1//========================================================================
2//
3// pdfinfo.cc
4//
5// Copyright 1998-2003 Glyph & Cog, LLC
6// Copyright 2013 Igalia S.L.
7//
8//========================================================================
9
10//========================================================================
11//
12// Modified under the Poppler project - http://poppler.freedesktop.org
13//
14// All changes made under the Poppler project to this file are licensed
15// under GPL version 2 or later
16//
17// Copyright (C) 2006 Dom Lachowicz <cinamod@hotmail.com>
18// Copyright (C) 2007-2010, 2012, 2016-2022 Albert Astals Cid <aacid@kde.org>
19// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
20// Copyright (C) 2011 Vittal Aithal <vittal.aithal@cognidox.com>
21// Copyright (C) 2012, 2013, 2016-2018, 2021 Adrian Johnson <ajohnson@redneon.com>
22// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
23// Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
24// Copyright (C) 2013 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
25// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
26// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
27// Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
28// Copyright (C) 2019 Christian Persch <chpe@src.gnome.org>
29// Copyright (C) 2019-2021 Oliver Sander <oliver.sander@tu-dresden.de>
30// Copyright (C) 2019 Thomas Fischer <fischer@unix-ag.uni-kl.de>
31// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
32//
33// To see a description of the changes please see the Changelog file that
34// came with your tarball or type make ChangeLog if you are building from git
35//
36//========================================================================
37
38#include "config.h"
39#include <poppler-config.h>
40#include <cstdio>
41#include <cstdlib>
42#include <cstddef>
43#include <cstring>
44#include <ctime>
45#include <cmath>
46#include <map>
47#include <set>
48#include "parseargs.h"
49#include "printencodings.h"
50#include "goo/GooString.h"
51#include "goo/gfile.h"
52#include "goo/glibc.h"
53#include "goo/gmem.h"
54#include "GlobalParams.h"
55#include "Object.h"
56#include "Stream.h"
57#include "Array.h"
58#include "Dict.h"
59#include "XRef.h"
60#include "Catalog.h"
61#include "Page.h"
62#include "PDFDoc.h"
63#include "PDFDocFactory.h"
64#include "CharTypes.h"
65#include "UnicodeMap.h"
66#include "UTF.h"
67#include "Error.h"
68#include "DateInfo.h"
69#include "JSInfo.h"
70#include "StructTreeRoot.h"
71#include "StructElement.h"
72#include "Win32Console.h"
73
74static int firstPage = 1;
75static int lastPage = 0;
76static bool printBoxes = false;
77static bool printMetadata = false;
78static bool printCustom = false;
79static bool printJS = false;
80static bool isoDates = false;
81static bool rawDates = false;
82static char textEncName[128] = "";
83static char ownerPassword[33] = "\001";
84static char userPassword[33] = "\001";
85static bool printVersion = false;
86static bool printHelp = false;
87static bool printEnc = false;
88static bool printStructure = false;
89static bool printStructureText = false;
90static bool printDests = false;
91static bool printUrls = false;
92
93static const ArgDesc argDesc[] = { { .arg: "-f", .kind: argInt, .val: &firstPage, .size: 0, .usage: "first page to convert" },
94 { .arg: "-l", .kind: argInt, .val: &lastPage, .size: 0, .usage: "last page to convert" },
95 { .arg: "-box", .kind: argFlag, .val: &printBoxes, .size: 0, .usage: "print the page bounding boxes" },
96 { .arg: "-meta", .kind: argFlag, .val: &printMetadata, .size: 0, .usage: "print the document metadata (XML)" },
97 { .arg: "-custom", .kind: argFlag, .val: &printCustom, .size: 0, .usage: "print both custom and standard metadata" },
98 { .arg: "-js", .kind: argFlag, .val: &printJS, .size: 0, .usage: "print all JavaScript in the PDF" },
99 { .arg: "-struct", .kind: argFlag, .val: &printStructure, .size: 0, .usage: "print the logical document structure (for tagged files)" },
100 { .arg: "-struct-text", .kind: argFlag, .val: &printStructureText, .size: 0, .usage: "print text contents along with document structure (for tagged files)" },
101 { .arg: "-isodates", .kind: argFlag, .val: &isoDates, .size: 0, .usage: "print the dates in ISO-8601 format" },
102 { .arg: "-rawdates", .kind: argFlag, .val: &rawDates, .size: 0, .usage: "print the undecoded date strings directly from the PDF file" },
103 { .arg: "-dests", .kind: argFlag, .val: &printDests, .size: 0, .usage: "print all named destinations in the PDF" },
104 { .arg: "-url", .kind: argFlag, .val: &printUrls, .size: 0, .usage: "print all URLs inside PDF objects (does not scan text content)" },
105 { .arg: "-enc", .kind: argString, .val: textEncName, .size: sizeof(textEncName), .usage: "output text encoding name" },
106 { .arg: "-listenc", .kind: argFlag, .val: &printEnc, .size: 0, .usage: "list available encodings" },
107 { .arg: "-opw", .kind: argString, .val: ownerPassword, .size: sizeof(ownerPassword), .usage: "owner password (for encrypted files)" },
108 { .arg: "-upw", .kind: argString, .val: userPassword, .size: sizeof(userPassword), .usage: "user password (for encrypted files)" },
109 { .arg: "-v", .kind: argFlag, .val: &printVersion, .size: 0, .usage: "print copyright and version info" },
110 { .arg: "-h", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
111 { .arg: "-help", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
112 { .arg: "--help", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
113 { .arg: "-?", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
114 {} };
115
116static void printTextString(const GooString *s, const UnicodeMap *uMap)
117{
118 char buf[8];
119 std::vector<Unicode> u = TextStringToUCS4(textStr: s->toStr());
120 for (const auto &c : u) {
121 int n = uMap->mapUnicode(u: c, buf, bufSize: sizeof(buf));
122 fwrite(ptr: buf, size: 1, n: n, stdout);
123 }
124}
125
126static void printUCS4String(const Unicode *u, int len, const UnicodeMap *uMap)
127{
128 char buf[8];
129 for (int i = 0; i < len; i++) {
130 int n = uMap->mapUnicode(u: u[i], buf, bufSize: sizeof(buf));
131 fwrite(ptr: buf, size: 1, n: n, stdout);
132 }
133}
134
135static void printInfoString(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
136{
137 const GooString *s1;
138
139 Object obj = infoDict->lookup(key);
140 if (obj.isString()) {
141 fputs(s: text, stdout);
142 s1 = obj.getString();
143 printTextString(s: s1, uMap);
144 fputc(c: '\n', stdout);
145 }
146}
147
148static void printInfoDate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
149{
150 int year, mon, day, hour, min, sec, tz_hour, tz_minute;
151 char tz;
152 struct tm tmStruct;
153 time_t time;
154 char buf[256];
155
156 Object obj = infoDict->lookup(key);
157 if (obj.isString()) {
158 fputs(s: text, stdout);
159 const GooString *s = obj.getString();
160 // TODO do something with the timezone info
161 if (parseDateString(date: s, year: &year, month: &mon, day: &day, hour: &hour, minute: &min, second: &sec, tz: &tz, tzHour: &tz_hour, tzMinute: &tz_minute)) {
162 tmStruct.tm_year = year - 1900;
163 tmStruct.tm_mon = mon - 1;
164 tmStruct.tm_mday = day;
165 tmStruct.tm_hour = hour;
166 tmStruct.tm_min = min;
167 tmStruct.tm_sec = sec;
168 tmStruct.tm_wday = -1;
169 tmStruct.tm_yday = -1;
170 tmStruct.tm_isdst = -1;
171 // compute the tm_wday and tm_yday fields
172 time = timegm(tp: &tmStruct);
173 if (time != (time_t)-1) {
174 int offset = (tz_hour * 60 + tz_minute) * 60;
175 if (tz == '-') {
176 offset *= -1;
177 }
178 time -= offset;
179 localtime_r(timer: &time, tp: &tmStruct);
180 strftime(s: buf, maxsize: sizeof(buf), format: "%c %Z", tp: &tmStruct);
181 fputs(s: buf, stdout);
182 } else {
183 printTextString(s, uMap);
184 }
185 } else {
186 printTextString(s, uMap);
187 }
188 fputc(c: '\n', stdout);
189 }
190}
191
192static void printISODate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
193{
194 int year, mon, day, hour, min, sec, tz_hour, tz_minute;
195 char tz;
196
197 Object obj = infoDict->lookup(key);
198 if (obj.isString()) {
199 fputs(s: text, stdout);
200 const GooString *s = obj.getString();
201 if (parseDateString(date: s, year: &year, month: &mon, day: &day, hour: &hour, minute: &min, second: &sec, tz: &tz, tzHour: &tz_hour, tzMinute: &tz_minute)) {
202 fprintf(stdout, format: "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec);
203 if (tz_hour == 0 && tz_minute == 0) {
204 fprintf(stdout, format: "Z");
205 } else {
206 fprintf(stdout, format: "%c%02d", tz, tz_hour);
207 if (tz_minute) {
208 fprintf(stdout, format: ":%02d", tz_minute);
209 }
210 }
211 } else {
212 printTextString(s: obj.getString(), uMap);
213 }
214 fputc(c: '\n', stdout);
215 }
216}
217
218static void printBox(const char *text, const PDFRectangle *box)
219{
220 printf(format: "%s%8.2f %8.2f %8.2f %8.2f\n", text, box->x1, box->y1, box->x2, box->y2);
221}
222
223static void printIndent(unsigned indent)
224{
225 while (indent--) {
226 putchar(c: ' ');
227 putchar(c: ' ');
228 }
229}
230
231static void printAttribute(const Attribute *attribute, unsigned indent)
232{
233 printIndent(indent);
234 printf(format: " /%s ", attribute->getTypeName());
235 if (attribute->getType() == Attribute::UserProperty) {
236 std::unique_ptr<GooString> name = attribute->getName();
237 printf(format: "(%s) ", name->c_str());
238 }
239 attribute->getValue()->print(stdout);
240 if (attribute->getFormattedValue()) {
241 printf(format: " \"%s\"", attribute->getFormattedValue());
242 }
243 if (attribute->isHidden()) {
244 printf(format: " [hidden]");
245 }
246}
247
248static void printStruct(const StructElement *element, unsigned indent)
249{
250 if (element->isObjectRef()) {
251 printIndent(indent);
252 printf(format: "Object %i %i\n", element->getObjectRef().num, element->getObjectRef().gen);
253 return;
254 }
255
256 if (printStructureText && element->isContent()) {
257 GooString *text = element->getText(recursive: false);
258 printIndent(indent);
259 if (text) {
260 printf(format: "\"%s\"\n", text->c_str());
261 } else {
262 printf(format: "(No content?)\n");
263 }
264 delete text;
265 }
266
267 if (!element->isContent()) {
268 printIndent(indent);
269 printf(format: "%s", element->getTypeName());
270 if (element->getID()) {
271 printf(format: " <%s>", element->getID()->c_str());
272 }
273 if (element->getTitle()) {
274 printf(format: " \"%s\"", element->getTitle()->c_str());
275 }
276 if (element->getRevision() > 0) {
277 printf(format: " r%u", element->getRevision());
278 }
279 if (element->isInline() || element->isBlock()) {
280 printf(format: " (%s)", element->isInline() ? "inline" : "block");
281 }
282 if (element->getNumAttributes()) {
283 putchar(c: ':');
284 for (unsigned i = 0; i < element->getNumAttributes(); i++) {
285 putchar(c: '\n');
286 printAttribute(attribute: element->getAttribute(i), indent: indent + 1);
287 }
288 }
289
290 putchar(c: '\n');
291 for (unsigned i = 0; i < element->getNumChildren(); i++) {
292 printStruct(element: element->getChild(i), indent: indent + 1);
293 }
294 }
295}
296
297struct GooStringCompare
298{
299 bool operator()(GooString *lhs, GooString *rhs) const { return lhs->cmp(str: const_cast<GooString *>(rhs)) < 0; }
300};
301
302static void printLinkDest(const std::unique_ptr<LinkDest> &dest)
303{
304 GooString s;
305
306 switch (dest->getKind()) {
307 case destXYZ:
308 s.append(str: "[ XYZ ");
309 if (dest->getChangeLeft()) {
310 s.appendf(fmt: "{0:4.0g} ", dest->getLeft());
311 } else {
312 s.append(str: "null ");
313 }
314 if (dest->getChangeTop()) {
315 s.appendf(fmt: "{0:4.0g} ", dest->getTop());
316 } else {
317 s.append(str: "null ");
318 }
319 if (dest->getChangeZoom()) {
320 s.appendf(fmt: "{0:4.2f} ", dest->getZoom());
321 } else {
322 s.append(str: "null ");
323 }
324 break;
325 case destFit:
326 s.append(str: "[ Fit ");
327 break;
328 case destFitH:
329 if (dest->getChangeTop()) {
330 s.appendf(fmt: "[ FitH {0:4.0g} ", dest->getTop());
331 } else {
332 s.append(str: "[ FitH null ");
333 }
334 break;
335 case destFitV:
336 if (dest->getChangeLeft()) {
337 s.appendf(fmt: "[ FitV {0:4.0g} ", dest->getLeft());
338 } else {
339 s.append(str: "[ FitV null ");
340 }
341 break;
342 case destFitR:
343 s.appendf(fmt: "[ FitR {0:4.0g} {1:4.0g} {2:4.0g} {3:4.0g} ", dest->getLeft(), dest->getBottom(), dest->getRight(), dest->getTop());
344 break;
345 case destFitB:
346 s.append(str: "[ FitB ");
347 break;
348 case destFitBH:
349 if (dest->getChangeTop()) {
350 s.appendf(fmt: "[ FitBH {0:4.0g} ", dest->getTop());
351 } else {
352 s.append(str: "[ FitBH null ");
353 }
354 break;
355 case destFitBV:
356 if (dest->getChangeLeft()) {
357 s.appendf(fmt: "[ FitBV {0:4.0g} ", dest->getLeft());
358 } else {
359 s.append(str: "[ FitBV null ");
360 }
361 break;
362 }
363
364 s.append(str: " ");
365 s.setChar(i: 26, c: ']');
366 s.setChar(i: 27, c: '\0');
367 printf(format: "%s", s.c_str());
368}
369
370static void printDestinations(PDFDoc *doc, const UnicodeMap *uMap)
371{
372 std::map<Ref, std::map<GooString *, std::unique_ptr<LinkDest>, GooStringCompare>> map;
373
374 int numDests = doc->getCatalog()->numDestNameTree();
375 for (int i = 0; i < numDests; i++) {
376 GooString *name = new GooString(doc->getCatalog()->getDestNameTreeName(i));
377 std::unique_ptr<LinkDest> dest = doc->getCatalog()->getDestNameTreeDest(i);
378 if (dest && dest->isPageRef()) {
379 Ref pageRef = dest->getPageRef();
380 map[pageRef].insert(x: std::make_pair(x&: name, y: std::move(dest)));
381 } else {
382 delete name;
383 }
384 }
385
386 numDests = doc->getCatalog()->numDests();
387 for (int i = 0; i < numDests; i++) {
388 GooString *name = new GooString(doc->getCatalog()->getDestsName(i));
389 std::unique_ptr<LinkDest> dest = doc->getCatalog()->getDestsDest(i);
390 if (dest && dest->isPageRef()) {
391 Ref pageRef = dest->getPageRef();
392 map[pageRef].insert(x: std::make_pair(x&: name, y: std::move(dest)));
393 } else {
394 delete name;
395 }
396 }
397
398 printf(format: "Page Destination Name\n");
399 for (int i = firstPage; i <= lastPage; i++) {
400 Ref *ref = doc->getCatalog()->getPageRef(i);
401 if (ref) {
402 auto pageDests = map.find(x: *ref);
403 if (pageDests != map.end()) {
404 for (auto &it : pageDests->second) {
405 printf(format: "%4d ", i);
406 printLinkDest(dest: it.second);
407 printf(format: " \"");
408 printTextString(s: it.first, uMap);
409 printf(format: "\"\n");
410 delete it.first;
411 }
412 }
413 }
414 }
415}
416
417static void printUrlList(PDFDoc *doc)
418{
419 printf(format: "Page Type URL\n");
420 for (int pg = firstPage; pg <= lastPage; pg++) {
421 Page *page = doc->getPage(page: pg);
422 if (page) {
423 std::unique_ptr<Links> links = page->getLinks();
424 for (AnnotLink *annot : links->getLinks()) {
425 LinkAction *action = annot->getAction();
426 if (action->getKind() == actionURI) {
427 LinkURI *linkUri = dynamic_cast<LinkURI *>(action);
428 std::string uri = linkUri->getURI();
429 printf(format: "%4d Annotation %s\n", pg, uri.c_str());
430 }
431 }
432 }
433 }
434}
435
436static void printPdfSubtype(PDFDoc *doc, const UnicodeMap *uMap)
437{
438 const Object info = doc->getDocInfo();
439 if (info.isDict()) {
440 const PDFSubtype pdftype = doc->getPDFSubtype();
441
442 if ((pdftype == subtypeNull) | (pdftype == subtypeNone)) {
443 return;
444 }
445
446 std::unique_ptr<GooString> part;
447 std::unique_ptr<GooString> abbr;
448 std::unique_ptr<GooString> standard;
449 std::unique_ptr<GooString> typeExp;
450 std::unique_ptr<GooString> confExp;
451
452 // Form title from PDFSubtype
453 switch (pdftype) {
454 case subtypePDFA:
455 printInfoString(infoDict: info.getDict(), key: "GTS_PDFA1Version", text: "PDF subtype: ", uMap);
456 typeExp = std::make_unique<GooString>(args: "ISO 19005 - Electronic document file format for long-term preservation (PDF/A)");
457 standard = std::make_unique<GooString>(args: "ISO 19005");
458 abbr = std::make_unique<GooString>(args: "PDF/A");
459 break;
460 case subtypePDFE:
461 printInfoString(infoDict: info.getDict(), key: "GTS_PDFEVersion", text: "PDF subtype: ", uMap);
462 typeExp = std::make_unique<GooString>(args: "ISO 24517 - Engineering document format using PDF (PDF/E)");
463 standard = std::make_unique<GooString>(args: "ISO 24517");
464 abbr = std::make_unique<GooString>(args: "PDF/E");
465 break;
466 case subtypePDFUA:
467 printInfoString(infoDict: info.getDict(), key: "GTS_PDFUAVersion", text: "PDF subtype: ", uMap);
468 typeExp = std::make_unique<GooString>(args: "ISO 14289 - Electronic document file format enhancement for accessibility (PDF/UA)");
469 standard = std::make_unique<GooString>(args: "ISO 14289");
470 abbr = std::make_unique<GooString>(args: "PDF/UA");
471 break;
472 case subtypePDFVT:
473 printInfoString(infoDict: info.getDict(), key: "GTS_PDFVTVersion", text: "PDF subtype: ", uMap);
474 typeExp = std::make_unique<GooString>(args: "ISO 16612 - Electronic document file format for variable data exchange (PDF/VT)");
475 standard = std::make_unique<GooString>(args: "ISO 16612");
476 abbr = std::make_unique<GooString>(args: "PDF/VT");
477 break;
478 case subtypePDFX:
479 printInfoString(infoDict: info.getDict(), key: "GTS_PDFXVersion", text: "PDF subtype: ", uMap);
480 typeExp = std::make_unique<GooString>(args: "ISO 15930 - Electronic document file format for prepress digital data exchange (PDF/X)");
481 standard = std::make_unique<GooString>(args: "ISO 15930");
482 abbr = std::make_unique<GooString>(args: "PDF/X");
483 break;
484 case subtypeNone:
485 case subtypeNull:
486 default:
487 return;
488 }
489
490 // Form the abbreviation from PDFSubtypePart and PDFSubtype
491 const PDFSubtypePart subpart = doc->getPDFSubtypePart();
492 switch (pdftype) {
493 case subtypePDFX:
494 switch (subpart) {
495 case subtypePart1:
496 abbr->append(str: "-1:2001");
497 break;
498 case subtypePart2:
499 abbr->append(str: "-2");
500 break;
501 case subtypePart3:
502 abbr->append(str: "-3:2002");
503 break;
504 case subtypePart4:
505 abbr->append(str: "-1:2003");
506 break;
507 case subtypePart5:
508 abbr->append(str: "-2");
509 break;
510 case subtypePart6:
511 abbr->append(str: "-3:2003");
512 break;
513 case subtypePart7:
514 abbr->append(str: "-4");
515 break;
516 case subtypePart8:
517 abbr->append(str: "-5");
518 break;
519 default:
520 break;
521 }
522 break;
523 case subtypeNone:
524 case subtypeNull:
525 break;
526 default:
527 abbr->appendf(fmt: "-{0:d}", subpart);
528 break;
529 }
530
531 // Form standard from PDFSubtypePart
532 switch (subpart) {
533 case subtypePartNone:
534 case subtypePartNull:
535 break;
536 default:
537 standard->appendf(fmt: "-{0:d}", subpart);
538 break;
539 }
540
541 // Form the subtitle from PDFSubtypePart and PDFSubtype
542 switch (pdftype) {
543 case subtypePDFA:
544 switch (subpart) {
545 case subtypePart1:
546 part = std::make_unique<GooString>(args: "Use of PDF 1.4");
547 break;
548 case subtypePart2:
549 part = std::make_unique<GooString>(args: "Use of ISO 32000-1");
550 break;
551 case subtypePart3:
552 part = std::make_unique<GooString>(args: "Use of ISO 32000-1 with support for embedded files");
553 break;
554 default:
555 break;
556 }
557 break;
558 case subtypePDFE:
559 switch (subpart) {
560 case subtypePart1:
561 part = std::make_unique<GooString>(args: "Use of PDF 1.6");
562 break;
563 default:
564 break;
565 }
566 break;
567 case subtypePDFUA:
568 switch (subpart) {
569 case subtypePart1:
570 part = std::make_unique<GooString>(args: "Use of ISO 32000-1");
571 break;
572 case subtypePart2:
573 part = std::make_unique<GooString>(args: "Use of ISO 32000-2");
574 break;
575 case subtypePart3:
576 part = std::make_unique<GooString>(args: "Use of ISO 32000-1 with support for embedded files");
577 break;
578 default:
579 break;
580 }
581 break;
582 case subtypePDFVT:
583 switch (subpart) {
584 case subtypePart1:
585 part = std::make_unique<GooString>(args: "Using PPML 2.1 and PDF 1.4");
586 break;
587 case subtypePart2:
588 part = std::make_unique<GooString>(args: "Using PDF/X-4 and PDF/X-5 (PDF/VT-1 and PDF/VT-2)");
589 break;
590 case subtypePart3:
591 part = std::make_unique<GooString>(args: "Using PDF/X-6 (PDF/VT-3)");
592 break;
593 default:
594 break;
595 }
596 break;
597 case subtypePDFX:
598 switch (subpart) {
599 case subtypePart1:
600 part = std::make_unique<GooString>(args: "Complete exchange using CMYK data (PDF/X-1 and PDF/X-1a)");
601 break;
602 case subtypePart3:
603 part = std::make_unique<GooString>(args: "Complete exchange suitable for colour-managed workflows (PDF/X-3)");
604 break;
605 case subtypePart4:
606 part = std::make_unique<GooString>(args: "Complete exchange of CMYK and spot colour printing data using PDF 1.4 (PDF/X-1a)");
607 break;
608 case subtypePart5:
609 part = std::make_unique<GooString>(args: "Partial exchange of printing data using PDF 1.4 (PDF/X-2) [Withdrawn]");
610 break;
611 case subtypePart6:
612 part = std::make_unique<GooString>(args: "Complete exchange of printing data suitable for colour-managed workflows using PDF 1.4 (PDF/X-3)");
613 break;
614 case subtypePart7:
615 part = std::make_unique<GooString>(args: "Complete exchange of printing data (PDF/X-4) and partial exchange of printing data with external profile reference (PDF/X-4p) using PDF 1.6");
616 break;
617 case subtypePart8:
618 part = std::make_unique<GooString>(args: "Partial exchange of printing data using PDF 1.6 (PDF/X-5)");
619 break;
620 default:
621 break;
622 }
623 break;
624 default:
625 break;
626 }
627
628 // Form Conformance explanation from PDFSubtypeConformance
629 switch (doc->getPDFSubtypeConformance()) {
630 case subtypeConfA:
631 confExp = std::make_unique<GooString>(args: "Level A, Accessible");
632 break;
633 case subtypeConfB:
634 confExp = std::make_unique<GooString>(args: "Level B, Basic");
635 break;
636 case subtypeConfG:
637 confExp = std::make_unique<GooString>(args: "Level G, External graphical content");
638 break;
639 case subtypeConfN:
640 confExp = std::make_unique<GooString>(args: "Level N, External ICC profile");
641 break;
642 case subtypeConfP:
643 confExp = std::make_unique<GooString>(args: "Level P, Embedded ICC profile");
644 break;
645 case subtypeConfPG:
646 confExp = std::make_unique<GooString>(args: "Level PG, Embedded ICC profile and external graphical content");
647 break;
648 case subtypeConfU:
649 confExp = std::make_unique<GooString>(args: "Level U, Unicode support");
650 break;
651 case subtypeConfNone:
652 case subtypeConfNull:
653 default:
654 confExp.reset();
655 break;
656 }
657
658 printf(format: " Title: %s\n", typeExp->c_str());
659 printf(format: " Abbreviation: %s\n", abbr->c_str());
660 if (part.get()) {
661 printf(format: " Subtitle: Part %d: %s\n", subpart, part->c_str());
662 } else {
663 printf(format: " Subtitle: Part %d\n", subpart);
664 }
665 printf(format: " Standard: %s-%d\n", typeExp->toStr().substr(pos: 0, n: 9).c_str(), subpart);
666 if (confExp.get()) {
667 printf(format: " Conformance: %s\n", confExp->c_str());
668 }
669 }
670}
671
672static void printCustomInfo(PDFDoc *doc, const UnicodeMap *uMap)
673{
674 Object info = doc->getDocInfo();
675 if (info.isDict()) {
676 Dict *dict = info.getDict();
677
678 // Sort keys
679 std::set<std::string> keys;
680 for (int i = 0; i < dict->getLength(); i++) {
681 std::string key(dict->getKey(i));
682 if (key != "Trapped") {
683 keys.insert(x: key);
684 }
685 }
686
687 for (const std::string &key : keys) {
688 if (key == "CreationDate") {
689 if (isoDates) {
690 printISODate(infoDict: info.getDict(), key: "CreationDate", text: "CreationDate: ", uMap);
691 } else if (rawDates) {
692 printInfoString(infoDict: info.getDict(), key: "CreationDate", text: "CreationDate: ", uMap);
693 } else {
694 printInfoDate(infoDict: info.getDict(), key: "CreationDate", text: "CreationDate: ", uMap);
695 }
696 } else if (key == "ModDate") {
697 if (isoDates) {
698 printISODate(infoDict: info.getDict(), key: "ModDate", text: "ModDate: ", uMap);
699 } else if (rawDates) {
700 printInfoString(infoDict: info.getDict(), key: "ModDate", text: "ModDate: ", uMap);
701 } else {
702 printInfoDate(infoDict: info.getDict(), key: "ModDate", text: "ModDate: ", uMap);
703 }
704 } else {
705 Object obj = dict->lookup(key: key.c_str());
706 if (obj.isString()) {
707 // print key
708 Unicode *u;
709 int len = utf8ToUCS4(utf8: key.c_str(), ucs4_out: &u);
710 printUCS4String(u, len, uMap);
711 fputs(s: ":", stdout);
712 while (len < 16) {
713 fputs(s: " ", stdout);
714 len++;
715 }
716 gfree(p: u);
717
718 // print value
719 GooString val_str(obj.getString());
720 printTextString(s: &val_str, uMap);
721 fputc(c: '\n', stdout);
722 }
723 }
724 }
725 }
726}
727
728static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, bool multiPage)
729{
730 Page *page;
731 char buf[256];
732 double w, h, wISO, hISO, isoThreshold;
733 int pg, i;
734 int r;
735
736 // print doc info
737 Object info = doc->getDocInfo();
738 if (info.isDict()) {
739 printInfoString(infoDict: info.getDict(), key: "Title", text: "Title: ", uMap);
740 printInfoString(infoDict: info.getDict(), key: "Subject", text: "Subject: ", uMap);
741 printInfoString(infoDict: info.getDict(), key: "Keywords", text: "Keywords: ", uMap);
742 printInfoString(infoDict: info.getDict(), key: "Author", text: "Author: ", uMap);
743 printInfoString(infoDict: info.getDict(), key: "Creator", text: "Creator: ", uMap);
744 printInfoString(infoDict: info.getDict(), key: "Producer", text: "Producer: ", uMap);
745 if (isoDates) {
746 printISODate(infoDict: info.getDict(), key: "CreationDate", text: "CreationDate: ", uMap);
747 printISODate(infoDict: info.getDict(), key: "ModDate", text: "ModDate: ", uMap);
748 } else if (rawDates) {
749 printInfoString(infoDict: info.getDict(), key: "CreationDate", text: "CreationDate: ", uMap);
750 printInfoString(infoDict: info.getDict(), key: "ModDate", text: "ModDate: ", uMap);
751 } else {
752 printInfoDate(infoDict: info.getDict(), key: "CreationDate", text: "CreationDate: ", uMap);
753 printInfoDate(infoDict: info.getDict(), key: "ModDate", text: "ModDate: ", uMap);
754 }
755 }
756
757 bool hasMetadata = false;
758 std::unique_ptr<GooString> metadata = doc->readMetadata();
759 if (metadata) {
760 hasMetadata = true;
761 }
762
763 const std::set<std::string> docInfoStandardKeys { "Title", "Author", "Subject", "Keywords", "Creator", "Producer", "CreationDate", "ModDate", "Trapped" };
764
765 bool hasCustom = false;
766 if (info.isDict()) {
767 Dict *dict = info.getDict();
768 for (i = 0; i < dict->getLength(); i++) {
769 std::string key(dict->getKey(i));
770 if (docInfoStandardKeys.find(x: key) == docInfoStandardKeys.end()) {
771 hasCustom = true;
772 break;
773 }
774 }
775 }
776
777 // print metadata info
778 printf(format: "Custom Metadata: %s\n", hasCustom ? "yes" : "no");
779 printf(format: "Metadata Stream: %s\n", hasMetadata ? "yes" : "no");
780
781 // print tagging info
782 printf(format: "Tagged: %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked) ? "yes" : "no");
783 printf(format: "UserProperties: %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoUserProperties) ? "yes" : "no");
784 printf(format: "Suspects: %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoSuspects) ? "yes" : "no");
785
786 // print form info
787 switch (doc->getCatalog()->getFormType()) {
788 case Catalog::NoForm:
789 printf(format: "Form: none\n");
790 break;
791 case Catalog::AcroForm:
792 printf(format: "Form: AcroForm\n");
793 break;
794 case Catalog::XfaForm:
795 printf(format: "Form: XFA\n");
796 break;
797 }
798
799 // print javascript info
800 {
801 JSInfo jsInfo(doc, firstPage - 1);
802 jsInfo.scanJS(nPages: lastPage - firstPage + 1);
803 printf(format: "JavaScript: %s\n", jsInfo.containsJS() ? "yes" : "no");
804 }
805
806 // print page count
807 printf(format: "Pages: %d\n", doc->getNumPages());
808
809 // print encryption info
810 printf(format: "Encrypted: ");
811 if (doc->isEncrypted()) {
812 unsigned char *fileKey;
813 CryptAlgorithm encAlgorithm;
814 int keyLength;
815 doc->getXRef()->getEncryptionParameters(fileKeyA: &fileKey, encAlgorithmA: &encAlgorithm, keyLengthA: &keyLength);
816
817 const char *encAlgorithmName = "unknown";
818 switch (encAlgorithm) {
819 case cryptRC4:
820 encAlgorithmName = "RC4";
821 break;
822 case cryptAES:
823 encAlgorithmName = "AES";
824 break;
825 case cryptAES256:
826 encAlgorithmName = "AES-256";
827 break;
828 case cryptNone:
829 break;
830 }
831
832 printf(format: "yes (print:%s copy:%s change:%s addNotes:%s algorithm:%s)\n", doc->okToPrint(ignoreOwnerPW: true) ? "yes" : "no", doc->okToCopy(ignoreOwnerPW: true) ? "yes" : "no", doc->okToChange(ignoreOwnerPW: true) ? "yes" : "no", doc->okToAddNotes(ignoreOwnerPW: true) ? "yes" : "no",
833 encAlgorithmName);
834 } else {
835 printf(format: "no\n");
836 }
837
838 // print page size
839 for (pg = firstPage; pg <= lastPage; ++pg) {
840 w = doc->getPageCropWidth(page: pg);
841 h = doc->getPageCropHeight(page: pg);
842 if (multiPage) {
843 printf(format: "Page %4d size: %g x %g pts", pg, w, h);
844 } else {
845 printf(format: "Page size: %g x %g pts", w, h);
846 }
847 if ((fabs(x: w - 612) < 1 && fabs(x: h - 792) < 1) || (fabs(x: w - 792) < 1 && fabs(x: h - 612) < 1)) {
848 printf(format: " (letter)");
849 } else {
850 hISO = sqrt(x: sqrt(x: 2.0)) * 7200 / 2.54;
851 wISO = hISO / sqrt(x: 2.0);
852 isoThreshold = hISO * 0.003; ///< allow for 0.3% error when guessing conformance to ISO 216, A series
853 for (i = 0; i <= 6; ++i) {
854 if ((fabs(x: w - wISO) < isoThreshold && fabs(x: h - hISO) < isoThreshold) || (fabs(x: w - hISO) < isoThreshold && fabs(x: h - wISO) < isoThreshold)) {
855 printf(format: " (A%d)", i);
856 break;
857 }
858 hISO = wISO;
859 wISO /= sqrt(x: 2.0);
860 isoThreshold /= sqrt(x: 2.0);
861 }
862 }
863 printf(format: "\n");
864 r = doc->getPageRotate(page: pg);
865 if (multiPage) {
866 printf(format: "Page %4d rot: %d\n", pg, r);
867 } else {
868 printf(format: "Page rot: %d\n", r);
869 }
870 }
871
872 // print the boxes
873 if (printBoxes) {
874 if (multiPage) {
875 for (pg = firstPage; pg <= lastPage; ++pg) {
876 page = doc->getPage(page: pg);
877 if (!page) {
878 error(category: errSyntaxError, pos: -1, msg: "Failed to print boxes for page {0:d}", pg);
879 continue;
880 }
881 sprintf(s: buf, format: "Page %4d MediaBox: ", pg);
882 printBox(text: buf, box: page->getMediaBox());
883 sprintf(s: buf, format: "Page %4d CropBox: ", pg);
884 printBox(text: buf, box: page->getCropBox());
885 sprintf(s: buf, format: "Page %4d BleedBox: ", pg);
886 printBox(text: buf, box: page->getBleedBox());
887 sprintf(s: buf, format: "Page %4d TrimBox: ", pg);
888 printBox(text: buf, box: page->getTrimBox());
889 sprintf(s: buf, format: "Page %4d ArtBox: ", pg);
890 printBox(text: buf, box: page->getArtBox());
891 }
892 } else {
893 page = doc->getPage(page: firstPage);
894 if (!page) {
895 error(category: errSyntaxError, pos: -1, msg: "Failed to print boxes for page {0:d}", firstPage);
896 } else {
897 printBox(text: "MediaBox: ", box: page->getMediaBox());
898 printBox(text: "CropBox: ", box: page->getCropBox());
899 printBox(text: "BleedBox: ", box: page->getBleedBox());
900 printBox(text: "TrimBox: ", box: page->getTrimBox());
901 printBox(text: "ArtBox: ", box: page->getArtBox());
902 }
903 }
904 }
905
906 // print file size
907 printf(format: "File size: %lld bytes\n", filesize);
908
909 // print linearization info
910 printf(format: "Optimized: %s\n", doc->isLinearized() ? "yes" : "no");
911
912 // print PDF version
913 printf(format: "PDF version: %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion());
914
915 printPdfSubtype(doc, uMap);
916}
917
918int main(int argc, char *argv[])
919{
920 std::unique_ptr<PDFDoc> doc;
921 GooString *fileName;
922 std::optional<GooString> ownerPW, userPW;
923 const UnicodeMap *uMap;
924 FILE *f;
925 bool ok;
926 int exitCode;
927 bool multiPage;
928
929 exitCode = 99;
930
931 // parse args
932 Win32Console win32console(&argc, &argv);
933 ok = parseArgs(args: argDesc, argc: &argc, argv);
934 if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) {
935 fprintf(stderr, format: "pdfinfo version %s\n", PACKAGE_VERSION);
936 fprintf(stderr, format: "%s\n", popplerCopyright);
937 fprintf(stderr, format: "%s\n", xpdfCopyright);
938 if (!printVersion) {
939 printUsage(program: "pdfinfo", otherArgs: "<PDF-file>", args: argDesc);
940 }
941 if (printVersion || printHelp) {
942 exitCode = 0;
943 }
944 goto err0;
945 }
946
947 if (printStructureText) {
948 printStructure = true;
949 }
950
951 // read config file
952 globalParams = std::make_unique<GlobalParams>();
953
954 if (printEnc) {
955 printEncodings();
956 exitCode = 0;
957 goto err0;
958 }
959
960 fileName = new GooString(argv[1]);
961
962 if (textEncName[0]) {
963 globalParams->setTextEncoding(textEncName);
964 }
965
966 // get mapping to output encoding
967 if (!(uMap = globalParams->getTextEncoding())) {
968 error(category: errCommandLine, pos: -1, msg: "Couldn't get text encoding");
969 delete fileName;
970 goto err1;
971 }
972
973 // open PDF file
974 if (ownerPassword[0] != '\001') {
975 ownerPW = GooString(ownerPassword);
976 }
977 if (userPassword[0] != '\001') {
978 userPW = GooString(userPassword);
979 }
980
981 if (fileName->cmp(sA: "-") == 0) {
982 delete fileName;
983 fileName = new GooString("fd://0");
984 }
985
986 doc = PDFDocFactory().createPDFDoc(uri: *fileName, ownerPassword: ownerPW, userPassword: userPW);
987
988 if (!doc->isOk()) {
989 exitCode = 1;
990 goto err2;
991 }
992
993 // get page range
994 if (firstPage < 1) {
995 firstPage = 1;
996 }
997 if (lastPage == 0) {
998 multiPage = false;
999 } else {
1000 multiPage = true;
1001 }
1002 if (lastPage < 1 || lastPage > doc->getNumPages()) {
1003 lastPage = doc->getNumPages();
1004 }
1005 if (lastPage < firstPage) {
1006 error(category: errCommandLine, pos: -1, msg: "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).", firstPage, lastPage);
1007 goto err2;
1008 }
1009
1010 if (printMetadata) {
1011 // print the metadata
1012 const std::unique_ptr<GooString> metadata = doc->readMetadata();
1013 if (metadata) {
1014 fputs(s: metadata->c_str(), stdout);
1015 fputc(c: '\n', stdout);
1016 }
1017 } else if (printCustom) {
1018 printCustomInfo(doc: doc.get(), uMap);
1019 } else if (printJS) {
1020 // print javascript
1021 JSInfo jsInfo(doc.get(), firstPage - 1);
1022 jsInfo.scanJS(nPages: lastPage - firstPage + 1, stdout, uMap);
1023 } else if (printStructure || printStructureText) {
1024 // print structure
1025 const StructTreeRoot *structTree = doc->getCatalog()->getStructTreeRoot();
1026 if (structTree) {
1027 for (unsigned i = 0; i < structTree->getNumChildren(); i++) {
1028 printStruct(element: structTree->getChild(i), indent: 0);
1029 }
1030 }
1031 } else if (printDests) {
1032 printDestinations(doc: doc.get(), uMap);
1033 } else if (printUrls) {
1034 printUrlList(doc: doc.get());
1035 } else {
1036 // print info
1037 long long filesize = 0;
1038
1039 f = fopen(filename: fileName->c_str(), modes: "rb");
1040 if (f) {
1041 Gfseek(f, offset: 0, SEEK_END);
1042 filesize = Gftell(f);
1043 fclose(stream: f);
1044 }
1045
1046 if (multiPage == false) {
1047 lastPage = 1;
1048 }
1049
1050 printInfo(doc: doc.get(), uMap, filesize, multiPage);
1051 }
1052 exitCode = 0;
1053
1054 // clean up
1055err2:
1056 delete fileName;
1057err1:
1058err0:
1059
1060 return exitCode;
1061}
1062

source code of poppler/utils/pdfinfo.cc