1/*
2 * Copyright (C) 2009-2010, Pino Toscano <pino@kde.org>
3 * Copyright (C) 2017-2019, 2022, Albert Astals Cid <aacid@kde.org>
4 * Copyright (C) 2017, Jason Alan Palmer <jalanpalmer@gmail.com>
5 * Copyright (C) 2018, 2020, Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
6 * Copyright (C) 2019, Masamichi Hosoda <trueroad@trueroad.jp>
7 * Copyright (C) 2020, Jiri Jakes <freedesktop@jirijakes.eu>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
22 */
23
24#include <goo/glibc.h>
25#include <poppler-destination.h>
26#include <poppler-document.h>
27#include <poppler-embedded-file.h>
28#include <poppler-font.h>
29#include <poppler-page.h>
30#include <poppler-toc.h>
31#include <poppler-version.h>
32
33#include <cstdlib>
34#include <cstring>
35#include <ctime>
36#include <algorithm>
37#include <iomanip>
38#include <ios>
39#include <iostream>
40#include <map>
41#include <memory>
42#include <sstream>
43
44#include "parseargs.h"
45
46#include "config.h"
47
48static const int out_width = 30;
49
50bool show_all = false;
51bool show_info = false;
52bool show_perm = false;
53bool show_metadata = false;
54bool show_toc = false;
55bool show_fonts = false;
56bool show_embedded_files = false;
57bool show_pages = false;
58bool show_destinations = false;
59bool show_help = false;
60bool show_version = false;
61char show_text[32];
62bool show_text_list = false;
63bool show_text_list_with_font = false;
64poppler::page::text_layout_enum show_text_layout = poppler::page::physical_layout;
65
66static const ArgDesc the_args[] = { { .arg: "--show-all", .kind: argFlag, .val: &show_all, .size: 0, .usage: "show all the available information" },
67 { .arg: "--show-info", .kind: argFlag, .val: &show_info, .size: 0, .usage: "show general document information" },
68 { .arg: "--show-perm", .kind: argFlag, .val: &show_perm, .size: 0, .usage: "show document permissions" },
69 { .arg: "--show-metadata", .kind: argFlag, .val: &show_metadata, .size: 0, .usage: "show document metadata" },
70 { .arg: "--show-toc", .kind: argFlag, .val: &show_toc, .size: 0, .usage: "show the TOC" },
71 { .arg: "--show-fonts", .kind: argFlag, .val: &show_fonts, .size: 0, .usage: "show the document fonts" },
72 { .arg: "--show-embedded-files", .kind: argFlag, .val: &show_embedded_files, .size: 0, .usage: "show the document-level embedded files" },
73 { .arg: "--show-pages", .kind: argFlag, .val: &show_pages, .size: 0, .usage: "show pages information" },
74 { .arg: "--show-destinations", .kind: argFlag, .val: &show_destinations, .size: 0, .usage: "show named destinations" },
75 { .arg: "--show-text", .kind: argString, .val: &show_text, .size: sizeof(show_text), .usage: "show text (physical|raw|none) extracted from all pages" },
76 { .arg: "--show-text-list", .kind: argFlag, .val: &show_text_list, .size: 0, .usage: "show text list (experimental)" },
77 { .arg: "--show-text-list-with-font", .kind: argFlag, .val: &show_text_list_with_font, .size: 0, .usage: "show text list with font info (experimental)" },
78 { .arg: "-h", .kind: argFlag, .val: &show_help, .size: 0, .usage: "print usage information" },
79 { .arg: "--help", .kind: argFlag, .val: &show_help, .size: 0, .usage: "print usage information" },
80 { .arg: "--version", .kind: argFlag, .val: &show_version, .size: 0, .usage: "print poppler version" },
81 { .arg: nullptr, .kind: argFlag, .val: nullptr, .size: 0, .usage: nullptr } };
82
83static void error(const std::string &msg)
84{
85 std::cerr << "Error: " << msg << std::endl;
86 std::cerr << "Exiting..." << std::endl;
87 exit(status: 1);
88}
89
90static std::ostream &operator<<(std::ostream &stream, const poppler::ustring &str)
91{
92 const poppler::byte_array ba = str.to_utf8();
93 for (const char c : ba) {
94 stream << c;
95 }
96 return stream;
97}
98
99static std::string out_date(std::time_t date)
100{
101 if (date != std::time_t(-1)) {
102 struct tm time;
103 gmtime_r(timer: &date, tp: &time);
104 struct tm *t = &time;
105 char buf[32];
106 strftime(s: buf, maxsize: sizeof(buf) - 1, format: "%d/%m/%Y %H:%M:%S", tp: t);
107 return std::string(buf);
108 }
109 return std::string("n/a");
110}
111
112static std::string out_size(int size)
113{
114 if (size >= 0) {
115 std::ostringstream ss;
116 ss << size;
117 return ss.str();
118 }
119 return std::string("n/a");
120}
121
122static char charToHex(int x)
123{
124 return x < 10 ? x + '0' : x - 10 + 'a';
125}
126
127static std::string out_hex_string(const poppler::byte_array &str)
128{
129 std::string ret(str.size() * 2, '\0');
130 const char *str_p = &str[0];
131 for (unsigned int i = 0; i < str.size(); ++i, ++str_p) {
132 ret[i * 2] = charToHex(x: (*str_p & 0xf0) >> 4);
133 ret[i * 2 + 1] = charToHex(x: *str_p & 0xf);
134 }
135 return ret;
136}
137
138static std::string out_page_orientation(poppler::page::orientation_enum o)
139{
140 switch (o) {
141 case poppler::page::landscape:
142 return "landscape (90)";
143 case poppler::page::portrait:
144 return "portrait (0)";
145 case poppler::page::seascape:
146 return "seascape (270)";
147 case poppler::page::upside_down:
148 return "upside_downs (180)";
149 };
150 return "<unknown orientation>";
151}
152
153static std::string out_font_info_type(poppler::font_info::type_enum t)
154{
155#define OUT_FONT_TYPE(thetype) \
156 case poppler::font_info::thetype: \
157 return #thetype
158 switch (t) {
159 OUT_FONT_TYPE(unknown);
160 OUT_FONT_TYPE(type1);
161 OUT_FONT_TYPE(type1c);
162 OUT_FONT_TYPE(type1c_ot);
163 OUT_FONT_TYPE(type3);
164 OUT_FONT_TYPE(truetype);
165 OUT_FONT_TYPE(truetype_ot);
166 OUT_FONT_TYPE(cid_type0);
167 OUT_FONT_TYPE(cid_type0c);
168 OUT_FONT_TYPE(cid_type0c_ot);
169 OUT_FONT_TYPE(cid_truetype);
170 OUT_FONT_TYPE(cid_truetype_ot);
171 }
172 return "<unknown font type>";
173#undef OUT_FONT_TYPE
174}
175
176static void print_info(poppler::document *doc)
177{
178 std::cout << "Document information:" << std::endl;
179 int major = 0, minor = 0;
180 doc->get_pdf_version(major: &major, minor: &minor);
181 std::cout << std::setw(out_width) << "PDF version"
182 << ": " << major << "." << minor << std::endl;
183 std::string permanent_id, update_id;
184 if (doc->get_pdf_id(permanent_id: &permanent_id, update_id: &update_id)) {
185 std::cout << std::setw(out_width) << "PDF IDs"
186 << ": P: " << permanent_id << " - U: " << update_id << std::endl;
187 } else {
188 std::cout << std::setw(out_width) << "PDF IDs"
189 << ": <none>" << std::endl;
190 }
191 const std::vector<std::string> keys = doc->info_keys();
192 std::vector<std::string>::const_iterator key_it = keys.begin(), key_end = keys.end();
193 for (; key_it != key_end; ++key_it) {
194 std::cout << std::setw(out_width) << *key_it << ": " << doc->info_key(key: *key_it) << std::endl;
195 }
196 std::cout << std::setw(out_width) << "Date (creation)"
197 << ": " << out_date(date: doc->info_date_t(key: "CreationDate")) << std::endl;
198 std::cout << std::setw(out_width) << "Date (modification)"
199 << ": " << out_date(date: doc->info_date_t(key: "ModDate")) << std::endl;
200 std::cout << std::setw(out_width) << "Number of pages"
201 << ": " << doc->pages() << std::endl;
202 std::cout << std::setw(out_width) << "Linearized"
203 << ": " << doc->is_linearized() << std::endl;
204 std::cout << std::setw(out_width) << "Encrypted"
205 << ": " << doc->is_encrypted() << std::endl;
206 std::cout << std::endl;
207}
208
209static void print_perm(poppler::document *doc)
210{
211 std::cout << "Document permissions:" << std::endl;
212#define OUT_PERM(theperm) std::cout << std::setw(out_width) << #theperm << ": " << doc->has_permission(poppler::perm_##theperm) << std::endl
213 OUT_PERM(print);
214 OUT_PERM(change);
215 OUT_PERM(copy);
216 OUT_PERM(add_notes);
217 OUT_PERM(fill_forms);
218 OUT_PERM(accessibility);
219 OUT_PERM(assemble);
220 OUT_PERM(print_high_resolution);
221 std::cout << std::endl;
222#undef OUT_PERM
223}
224
225static void print_metadata(poppler::document *doc)
226{
227 std::cout << std::setw(out_width) << "Metadata"
228 << ":" << std::endl
229 << doc->metadata() << std::endl;
230 std::cout << std::endl;
231}
232
233static void print_toc_item(poppler::toc_item *item, int indent)
234{
235 std::cout << std::setw(indent * 2) << " "
236 << "+ " << item->title() << " (" << item->is_open() << ")" << std::endl;
237 poppler::toc_item::iterator it = item->children_begin(), it_end = item->children_end();
238 for (; it != it_end; ++it) {
239 print_toc_item(item: *it, indent: indent + 1);
240 }
241}
242
243static void print_toc(poppler::toc *doctoc)
244{
245 std::cout << "Document TOC:" << std::endl;
246 if (doctoc) {
247 print_toc_item(item: doctoc->root(), indent: 0);
248 } else {
249 std::cout << "<no TOC>" << std::endl;
250 }
251 std::cout << std::endl;
252}
253
254static void print_fonts(poppler::document *doc)
255{
256 std::cout << "Document fonts:" << std::endl;
257 std::vector<poppler::font_info> fl = doc->fonts();
258 if (!fl.empty()) {
259 std::vector<poppler::font_info>::const_iterator it = fl.begin(), it_end = fl.end();
260 const std::ios_base::fmtflags f = std::cout.flags();
261 std::left(base&: std::cout);
262 for (; it != it_end; ++it) {
263 std::cout << " " << std::setw(out_width + 10) << it->name() << " " << std::setw(15) << out_font_info_type(t: it->type()) << " " << std::setw(5) << it->is_embedded() << " " << std::setw(5) << it->is_subset() << " " << it->file()
264 << std::endl;
265 }
266 std::cout.flags(fmtfl: f);
267 } else {
268 std::cout << "<no fonts>" << std::endl;
269 }
270 std::cout << std::endl;
271}
272
273static void print_embedded_files(poppler::document *doc)
274{
275 std::cout << "Document embedded files:" << std::endl;
276 std::vector<poppler::embedded_file *> ef = doc->embedded_files();
277 if (!ef.empty()) {
278 std::vector<poppler::embedded_file *>::const_iterator it = ef.begin(), it_end = ef.end();
279 const std::ios_base::fmtflags flags = std::cout.flags();
280 std::left(base&: std::cout);
281 for (; it != it_end; ++it) {
282 poppler::embedded_file *f = *it;
283 std::cout << " " << std::setw(out_width + 10) << f->name() << " " << std::setw(10) << out_size(size: f->size()) << " " << std::setw(20) << out_date(date: f->creation_date_t()) << " " << std::setw(20) << out_date(date: f->modification_date_t())
284 << std::endl
285 << " ";
286 if (f->description().empty()) {
287 std::cout << "<no description>";
288 } else {
289 std::cout << f->description();
290 }
291 std::cout << std::endl
292 << " " << std::setw(35) << (f->checksum().empty() ? std::string("<no checksum>") : out_hex_string(str: f->checksum())) << " " << (f->mime_type().empty() ? std::string("<no mime type>") : f->mime_type()) << std::endl;
293 }
294 std::cout.flags(fmtfl: flags);
295 } else {
296 std::cout << "<no embedded files>" << std::endl;
297 }
298 std::cout << std::endl;
299}
300
301static void print_page(poppler::page *p)
302{
303 if (p) {
304 std::cout << std::setw(out_width) << "Rect"
305 << ": " << p->page_rect() << std::endl;
306 std::cout << std::setw(out_width) << "Label"
307 << ": " << p->label() << std::endl;
308 std::cout << std::setw(out_width) << "Duration"
309 << ": " << p->duration() << std::endl;
310 std::cout << std::setw(out_width) << "Orientation"
311 << ": " << out_page_orientation(o: p->orientation()) << std::endl;
312 } else {
313 std::cout << std::setw(out_width) << "Broken Page. Could not be parsed" << std::endl;
314 }
315 std::cout << std::endl;
316}
317
318static void print_destination(const poppler::destination *d)
319{
320 if (d) {
321 std::cout << std::setw(out_width) << "Type"
322 << ": ";
323 switch (d->type()) {
324 case poppler::destination::unknown:
325 std::cout << "unknown" << std::endl;
326 break;
327 case poppler::destination::xyz:
328 std::cout << "xyz" << std::endl
329 << std::setw(out_width) << "Page"
330 << ": " << d->page_number() << std::endl
331 << std::setw(out_width) << "Left"
332 << ": " << d->left() << std::endl
333 << std::setw(out_width) << "Top"
334 << ": " << d->top() << std::endl
335 << std::setw(out_width) << "Zoom"
336 << ": " << d->zoom() << std::endl;
337 break;
338 case poppler::destination::fit:
339 std::cout << "fit" << std::endl
340 << std::setw(out_width) << "Page"
341 << ": " << d->page_number() << std::endl;
342 break;
343 case poppler::destination::fit_h:
344 std::cout << "fit_h" << std::endl
345 << std::setw(out_width) << "Page"
346 << ": " << d->page_number() << std::endl
347 << std::setw(out_width) << "Top"
348 << ": " << d->top() << std::endl;
349 break;
350 case poppler::destination::fit_v:
351 std::cout << "fit_v" << std::endl
352 << std::setw(out_width) << "Page"
353 << ": " << d->page_number() << std::endl
354 << std::setw(out_width) << "Left"
355 << ": " << d->left() << std::endl;
356 break;
357 case poppler::destination::fit_r:
358 std::cout << "fit_r" << std::endl
359 << std::setw(out_width) << "Page"
360 << ": " << d->page_number() << std::endl
361 << std::setw(out_width) << "Left"
362 << ": " << d->left() << std::endl
363 << std::setw(out_width) << "Bottom"
364 << ": " << d->bottom() << std::endl
365 << std::setw(out_width) << "Right"
366 << ": " << d->right() << std::endl
367 << std::setw(out_width) << "Top"
368 << ": " << d->top() << std::endl;
369 break;
370 case poppler::destination::fit_b:
371 std::cout << "fit_b" << std::endl
372 << std::setw(out_width) << "Page"
373 << ": " << d->page_number() << std::endl;
374 break;
375 case poppler::destination::fit_b_h:
376 std::cout << "fit_b_h" << std::endl
377 << std::setw(out_width) << "Page"
378 << ": " << d->page_number() << std::endl
379 << std::setw(out_width) << "Top"
380 << ": " << d->top() << std::endl;
381 break;
382 case poppler::destination::fit_b_v:
383 std::cout << "fit_b_v" << std::endl
384 << std::setw(out_width) << "Page"
385 << ": " << d->page_number() << std::endl
386 << std::setw(out_width) << "Left"
387 << ": " << d->left() << std::endl;
388 break;
389 default:
390 std::cout << "error" << std::endl;
391 break;
392 }
393 }
394 std::cout << std::endl;
395}
396
397static void print_page_text(poppler::page *p)
398{
399 if (p) {
400 std::cout << p->text(r: poppler::rectf(), layout_mode: show_text_layout) << std::endl;
401 } else {
402 std::cout << std::setw(out_width) << "Broken Page. Could not be parsed" << std::endl;
403 }
404 std::cout << std::endl;
405}
406
407static void print_page_text_list(poppler::page *p, int opt_flag = 0)
408{
409 if (!p) {
410 std::cout << std::setw(out_width) << "Broken Page. Could not be parsed" << std::endl;
411 std::cout << std::endl;
412 return;
413 }
414 auto text_list = p->text_list(opt_flag);
415
416 std::cout << "---" << std::endl;
417 for (const poppler::text_box &text : text_list) {
418 poppler::rectf bbox = text.bbox();
419 poppler::ustring ustr = text.text();
420 int wmode = text.get_wmode();
421 double font_size = text.get_font_size();
422 std::string font_name = text.get_font_name();
423 std::cout << "[" << ustr << "] @ ";
424 std::cout << "( x=" << bbox.x() << " y=" << bbox.y() << " w=" << bbox.width() << " h=" << bbox.height() << " )";
425 if (text.has_font_info()) {
426 std::cout << "( fontname=" << font_name << " fontsize=" << font_size << " wmode=" << wmode << " )";
427 }
428 std::cout << std::endl;
429 }
430 std::cout << "---" << std::endl;
431}
432
433int main(int argc, char *argv[])
434{
435 if (!parseArgs(args: the_args, argc: &argc, argv) || argc < 2 || show_help) {
436 printUsage(program: argv[0], otherArgs: "DOCUMENT", args: the_args);
437 exit(status: 1);
438 }
439
440 if (show_text[0]) {
441 if (!memcmp(s1: show_text, s2: "physical", n: 9)) {
442 show_text_layout = poppler::page::physical_layout;
443 } else if (!memcmp(s1: show_text, s2: "raw", n: 4)) {
444 show_text_layout = poppler::page::raw_order_layout;
445 } else if (!memcmp(s1: show_text, s2: "none", n: 5)) {
446 show_text_layout = poppler::page::non_raw_non_physical_layout;
447 } else {
448 error(msg: std::string("unrecognized text mode: '") + show_text + "'");
449 }
450 }
451
452 std::string file_name(argv[1]);
453
454 std::unique_ptr<poppler::document> doc(poppler::document::load_from_file(file_name));
455 if (!doc.get()) {
456 error(msg: "loading error");
457 }
458 if (doc->is_locked()) {
459 error(msg: "encrypted document");
460 }
461
462 std::cout.setf(std::ios_base::boolalpha);
463
464 if (show_all) {
465 show_info = true;
466 show_perm = true;
467 show_metadata = true;
468 show_toc = true;
469 show_fonts = true;
470 show_embedded_files = true;
471 show_pages = true;
472 }
473
474 if (show_version) {
475 std::cout << std::setw(out_width) << "Compiled"
476 << ": poppler-cpp " << POPPLER_VERSION << std::endl
477 << std::setw(out_width) << "Running"
478 << ": poppler-cpp " << poppler::version_string() << std::endl;
479 }
480 if (show_info) {
481 print_info(doc: doc.get());
482 }
483 if (show_perm) {
484 print_perm(doc: doc.get());
485 }
486 if (show_metadata) {
487 print_metadata(doc: doc.get());
488 }
489 if (show_toc) {
490 std::unique_ptr<poppler::toc> doctoc(doc->create_toc());
491 print_toc(doctoc: doctoc.get());
492 }
493 if (show_fonts) {
494 print_fonts(doc: doc.get());
495 }
496 if (show_embedded_files) {
497 print_embedded_files(doc: doc.get());
498 }
499 if (show_pages) {
500 const int pages = doc->pages();
501 for (int i = 0; i < pages; ++i) {
502 std::cout << "Page " << (i + 1) << "/" << pages << ":" << std::endl;
503 std::unique_ptr<poppler::page> p(doc->create_page(index: i));
504 print_page(p: p.get());
505 }
506 }
507 if (show_destinations) {
508 auto map = doc->create_destination_map();
509 for (const auto &pair : map) {
510 std::string s = pair.first;
511 for (auto &c : s) {
512 if (c < 0x20 || c > 0x7e) {
513 c = '.';
514 }
515 }
516 std::cout << "Named destination \"" << s << "\":" << std::endl;
517 print_destination(d: &pair.second);
518 }
519 }
520 if (show_text[0]) {
521 const int pages = doc->pages();
522 for (int i = 0; i < pages; ++i) {
523 std::cout << "Page " << (i + 1) << "/" << pages << ":" << std::endl;
524 std::unique_ptr<poppler::page> p(doc->create_page(index: i));
525 print_page_text(p: p.get());
526 }
527 }
528 if (show_text_list || show_text_list_with_font) {
529 const int pages = doc->pages();
530 for (int i = 0; i < pages; ++i) {
531 std::cout << "Page " << (i + 1) << "/" << pages << ":" << std::endl;
532 std::unique_ptr<poppler::page> p(doc->create_page(index: i));
533 if (show_text_list_with_font) {
534 print_page_text_list(p: p.get(), opt_flag: poppler::page::text_list_include_font);
535 } else {
536 print_page_text_list(p: p.get(), opt_flag: 0);
537 }
538 }
539 }
540
541 return 0;
542}
543

source code of poppler/cpp/tests/poppler-dump.cpp