1 | //======================================================================== |
2 | // |
3 | // pdfinfo.cc |
4 | // |
5 | // Copyright 1998-2003 Glyph & Cog, LLC |
6 | // Copyright 2013 Igalia S.L. |
7 | // |
8 | //======================================================================== |
9 | |
10 | //======================================================================== |
11 | // |
12 | // Modified under the Poppler project - http://poppler.freedesktop.org |
13 | // |
14 | // All changes made under the Poppler project to this file are licensed |
15 | // under GPL version 2 or later |
16 | // |
17 | // Copyright (C) 2006 Dom Lachowicz <cinamod@hotmail.com> |
18 | // Copyright (C) 2007-2010, 2012, 2016-2022 Albert Astals Cid <aacid@kde.org> |
19 | // Copyright (C) 2010 Hib Eris <hib@hiberis.nl> |
20 | // Copyright (C) 2011 Vittal Aithal <vittal.aithal@cognidox.com> |
21 | // Copyright (C) 2012, 2013, 2016-2018, 2021 Adrian Johnson <ajohnson@redneon.com> |
22 | // Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it> |
23 | // Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com> |
24 | // Copyright (C) 2013 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp> |
25 | // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich |
26 | // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> |
27 | // Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org> |
28 | // Copyright (C) 2019 Christian Persch <chpe@src.gnome.org> |
29 | // Copyright (C) 2019-2021 Oliver Sander <oliver.sander@tu-dresden.de> |
30 | // Copyright (C) 2019 Thomas Fischer <fischer@unix-ag.uni-kl.de> |
31 | // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> |
32 | // |
33 | // To see a description of the changes please see the Changelog file that |
34 | // came with your tarball or type make ChangeLog if you are building from git |
35 | // |
36 | //======================================================================== |
37 | |
38 | #include "config.h" |
39 | #include <poppler-config.h> |
40 | #include <cstdio> |
41 | #include <cstdlib> |
42 | #include <cstddef> |
43 | #include <cstring> |
44 | #include <ctime> |
45 | #include <cmath> |
46 | #include <map> |
47 | #include <set> |
48 | #include "parseargs.h" |
49 | #include "printencodings.h" |
50 | #include "goo/GooString.h" |
51 | #include "goo/gfile.h" |
52 | #include "goo/glibc.h" |
53 | #include "goo/gmem.h" |
54 | #include "GlobalParams.h" |
55 | #include "Object.h" |
56 | #include "Stream.h" |
57 | #include "Array.h" |
58 | #include "Dict.h" |
59 | #include "XRef.h" |
60 | #include "Catalog.h" |
61 | #include "Page.h" |
62 | #include "PDFDoc.h" |
63 | #include "PDFDocFactory.h" |
64 | #include "CharTypes.h" |
65 | #include "UnicodeMap.h" |
66 | #include "UTF.h" |
67 | #include "Error.h" |
68 | #include "DateInfo.h" |
69 | #include "JSInfo.h" |
70 | #include "StructTreeRoot.h" |
71 | #include "StructElement.h" |
72 | #include "Win32Console.h" |
73 | |
74 | static int firstPage = 1; |
75 | static int lastPage = 0; |
76 | static bool printBoxes = false; |
77 | static bool printMetadata = false; |
78 | static bool printCustom = false; |
79 | static bool printJS = false; |
80 | static bool isoDates = false; |
81 | static bool rawDates = false; |
82 | static char textEncName[128] = "" ; |
83 | static char ownerPassword[33] = "\001" ; |
84 | static char userPassword[33] = "\001" ; |
85 | static bool printVersion = false; |
86 | static bool printHelp = false; |
87 | static bool printEnc = false; |
88 | static bool printStructure = false; |
89 | static bool printStructureText = false; |
90 | static bool printDests = false; |
91 | static bool printUrls = false; |
92 | |
93 | static const ArgDesc argDesc[] = { { .arg: "-f" , .kind: argInt, .val: &firstPage, .size: 0, .usage: "first page to convert" }, |
94 | { .arg: "-l" , .kind: argInt, .val: &lastPage, .size: 0, .usage: "last page to convert" }, |
95 | { .arg: "-box" , .kind: argFlag, .val: &printBoxes, .size: 0, .usage: "print the page bounding boxes" }, |
96 | { .arg: "-meta" , .kind: argFlag, .val: &printMetadata, .size: 0, .usage: "print the document metadata (XML)" }, |
97 | { .arg: "-custom" , .kind: argFlag, .val: &printCustom, .size: 0, .usage: "print both custom and standard metadata" }, |
98 | { .arg: "-js" , .kind: argFlag, .val: &printJS, .size: 0, .usage: "print all JavaScript in the PDF" }, |
99 | { .arg: "-struct" , .kind: argFlag, .val: &printStructure, .size: 0, .usage: "print the logical document structure (for tagged files)" }, |
100 | { .arg: "-struct-text" , .kind: argFlag, .val: &printStructureText, .size: 0, .usage: "print text contents along with document structure (for tagged files)" }, |
101 | { .arg: "-isodates" , .kind: argFlag, .val: &isoDates, .size: 0, .usage: "print the dates in ISO-8601 format" }, |
102 | { .arg: "-rawdates" , .kind: argFlag, .val: &rawDates, .size: 0, .usage: "print the undecoded date strings directly from the PDF file" }, |
103 | { .arg: "-dests" , .kind: argFlag, .val: &printDests, .size: 0, .usage: "print all named destinations in the PDF" }, |
104 | { .arg: "-url" , .kind: argFlag, .val: &printUrls, .size: 0, .usage: "print all URLs inside PDF objects (does not scan text content)" }, |
105 | { .arg: "-enc" , .kind: argString, .val: textEncName, .size: sizeof(textEncName), .usage: "output text encoding name" }, |
106 | { .arg: "-listenc" , .kind: argFlag, .val: &printEnc, .size: 0, .usage: "list available encodings" }, |
107 | { .arg: "-opw" , .kind: argString, .val: ownerPassword, .size: sizeof(ownerPassword), .usage: "owner password (for encrypted files)" }, |
108 | { .arg: "-upw" , .kind: argString, .val: userPassword, .size: sizeof(userPassword), .usage: "user password (for encrypted files)" }, |
109 | { .arg: "-v" , .kind: argFlag, .val: &printVersion, .size: 0, .usage: "print copyright and version info" }, |
110 | { .arg: "-h" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
111 | { .arg: "-help" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
112 | { .arg: "--help" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
113 | { .arg: "-?" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
114 | {} }; |
115 | |
116 | static void printTextString(const GooString *s, const UnicodeMap *uMap) |
117 | { |
118 | char buf[8]; |
119 | std::vector<Unicode> u = TextStringToUCS4(textStr: s->toStr()); |
120 | for (const auto &c : u) { |
121 | int n = uMap->mapUnicode(u: c, buf, bufSize: sizeof(buf)); |
122 | fwrite(ptr: buf, size: 1, n: n, stdout); |
123 | } |
124 | } |
125 | |
126 | static void printUCS4String(const Unicode *u, int len, const UnicodeMap *uMap) |
127 | { |
128 | char buf[8]; |
129 | for (int i = 0; i < len; i++) { |
130 | int n = uMap->mapUnicode(u: u[i], buf, bufSize: sizeof(buf)); |
131 | fwrite(ptr: buf, size: 1, n: n, stdout); |
132 | } |
133 | } |
134 | |
135 | static void printInfoString(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap) |
136 | { |
137 | const GooString *s1; |
138 | |
139 | Object obj = infoDict->lookup(key); |
140 | if (obj.isString()) { |
141 | fputs(s: text, stdout); |
142 | s1 = obj.getString(); |
143 | printTextString(s: s1, uMap); |
144 | fputc(c: '\n', stdout); |
145 | } |
146 | } |
147 | |
148 | static void printInfoDate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap) |
149 | { |
150 | int year, mon, day, hour, min, sec, tz_hour, tz_minute; |
151 | char tz; |
152 | struct tm tmStruct; |
153 | time_t time; |
154 | char buf[256]; |
155 | |
156 | Object obj = infoDict->lookup(key); |
157 | if (obj.isString()) { |
158 | fputs(s: text, stdout); |
159 | const GooString *s = obj.getString(); |
160 | // TODO do something with the timezone info |
161 | if (parseDateString(date: s, year: &year, month: &mon, day: &day, hour: &hour, minute: &min, second: &sec, tz: &tz, tzHour: &tz_hour, tzMinute: &tz_minute)) { |
162 | tmStruct.tm_year = year - 1900; |
163 | tmStruct.tm_mon = mon - 1; |
164 | tmStruct.tm_mday = day; |
165 | tmStruct.tm_hour = hour; |
166 | tmStruct.tm_min = min; |
167 | tmStruct.tm_sec = sec; |
168 | tmStruct.tm_wday = -1; |
169 | tmStruct.tm_yday = -1; |
170 | tmStruct.tm_isdst = -1; |
171 | // compute the tm_wday and tm_yday fields |
172 | time = timegm(tp: &tmStruct); |
173 | if (time != (time_t)-1) { |
174 | int offset = (tz_hour * 60 + tz_minute) * 60; |
175 | if (tz == '-') { |
176 | offset *= -1; |
177 | } |
178 | time -= offset; |
179 | localtime_r(timer: &time, tp: &tmStruct); |
180 | strftime(s: buf, maxsize: sizeof(buf), format: "%c %Z" , tp: &tmStruct); |
181 | fputs(s: buf, stdout); |
182 | } else { |
183 | printTextString(s, uMap); |
184 | } |
185 | } else { |
186 | printTextString(s, uMap); |
187 | } |
188 | fputc(c: '\n', stdout); |
189 | } |
190 | } |
191 | |
192 | static void printISODate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap) |
193 | { |
194 | int year, mon, day, hour, min, sec, tz_hour, tz_minute; |
195 | char tz; |
196 | |
197 | Object obj = infoDict->lookup(key); |
198 | if (obj.isString()) { |
199 | fputs(s: text, stdout); |
200 | const GooString *s = obj.getString(); |
201 | if (parseDateString(date: s, year: &year, month: &mon, day: &day, hour: &hour, minute: &min, second: &sec, tz: &tz, tzHour: &tz_hour, tzMinute: &tz_minute)) { |
202 | fprintf(stdout, format: "%04d-%02d-%02dT%02d:%02d:%02d" , year, mon, day, hour, min, sec); |
203 | if (tz_hour == 0 && tz_minute == 0) { |
204 | fprintf(stdout, format: "Z" ); |
205 | } else { |
206 | fprintf(stdout, format: "%c%02d" , tz, tz_hour); |
207 | if (tz_minute) { |
208 | fprintf(stdout, format: ":%02d" , tz_minute); |
209 | } |
210 | } |
211 | } else { |
212 | printTextString(s: obj.getString(), uMap); |
213 | } |
214 | fputc(c: '\n', stdout); |
215 | } |
216 | } |
217 | |
218 | static void printBox(const char *text, const PDFRectangle *box) |
219 | { |
220 | printf(format: "%s%8.2f %8.2f %8.2f %8.2f\n" , text, box->x1, box->y1, box->x2, box->y2); |
221 | } |
222 | |
223 | static void printIndent(unsigned indent) |
224 | { |
225 | while (indent--) { |
226 | putchar(c: ' '); |
227 | putchar(c: ' '); |
228 | } |
229 | } |
230 | |
231 | static void printAttribute(const Attribute *attribute, unsigned indent) |
232 | { |
233 | printIndent(indent); |
234 | printf(format: " /%s " , attribute->getTypeName()); |
235 | if (attribute->getType() == Attribute::UserProperty) { |
236 | std::unique_ptr<GooString> name = attribute->getName(); |
237 | printf(format: "(%s) " , name->c_str()); |
238 | } |
239 | attribute->getValue()->print(stdout); |
240 | if (attribute->getFormattedValue()) { |
241 | printf(format: " \"%s\"" , attribute->getFormattedValue()); |
242 | } |
243 | if (attribute->isHidden()) { |
244 | printf(format: " [hidden]" ); |
245 | } |
246 | } |
247 | |
248 | static void printStruct(const StructElement *element, unsigned indent) |
249 | { |
250 | if (element->isObjectRef()) { |
251 | printIndent(indent); |
252 | printf(format: "Object %i %i\n" , element->getObjectRef().num, element->getObjectRef().gen); |
253 | return; |
254 | } |
255 | |
256 | if (printStructureText && element->isContent()) { |
257 | GooString *text = element->getText(recursive: false); |
258 | printIndent(indent); |
259 | if (text) { |
260 | printf(format: "\"%s\"\n" , text->c_str()); |
261 | } else { |
262 | printf(format: "(No content?)\n" ); |
263 | } |
264 | delete text; |
265 | } |
266 | |
267 | if (!element->isContent()) { |
268 | printIndent(indent); |
269 | printf(format: "%s" , element->getTypeName()); |
270 | if (element->getID()) { |
271 | printf(format: " <%s>" , element->getID()->c_str()); |
272 | } |
273 | if (element->getTitle()) { |
274 | printf(format: " \"%s\"" , element->getTitle()->c_str()); |
275 | } |
276 | if (element->getRevision() > 0) { |
277 | printf(format: " r%u" , element->getRevision()); |
278 | } |
279 | if (element->isInline() || element->isBlock()) { |
280 | printf(format: " (%s)" , element->isInline() ? "inline" : "block" ); |
281 | } |
282 | if (element->getNumAttributes()) { |
283 | putchar(c: ':'); |
284 | for (unsigned i = 0; i < element->getNumAttributes(); i++) { |
285 | putchar(c: '\n'); |
286 | printAttribute(attribute: element->getAttribute(i), indent: indent + 1); |
287 | } |
288 | } |
289 | |
290 | putchar(c: '\n'); |
291 | for (unsigned i = 0; i < element->getNumChildren(); i++) { |
292 | printStruct(element: element->getChild(i), indent: indent + 1); |
293 | } |
294 | } |
295 | } |
296 | |
297 | struct GooStringCompare |
298 | { |
299 | bool operator()(GooString *lhs, GooString *rhs) const { return lhs->cmp(str: const_cast<GooString *>(rhs)) < 0; } |
300 | }; |
301 | |
302 | static void printLinkDest(const std::unique_ptr<LinkDest> &dest) |
303 | { |
304 | GooString s; |
305 | |
306 | switch (dest->getKind()) { |
307 | case destXYZ: |
308 | s.append(str: "[ XYZ " ); |
309 | if (dest->getChangeLeft()) { |
310 | s.appendf(fmt: "{0:4.0g} " , dest->getLeft()); |
311 | } else { |
312 | s.append(str: "null " ); |
313 | } |
314 | if (dest->getChangeTop()) { |
315 | s.appendf(fmt: "{0:4.0g} " , dest->getTop()); |
316 | } else { |
317 | s.append(str: "null " ); |
318 | } |
319 | if (dest->getChangeZoom()) { |
320 | s.appendf(fmt: "{0:4.2f} " , dest->getZoom()); |
321 | } else { |
322 | s.append(str: "null " ); |
323 | } |
324 | break; |
325 | case destFit: |
326 | s.append(str: "[ Fit " ); |
327 | break; |
328 | case destFitH: |
329 | if (dest->getChangeTop()) { |
330 | s.appendf(fmt: "[ FitH {0:4.0g} " , dest->getTop()); |
331 | } else { |
332 | s.append(str: "[ FitH null " ); |
333 | } |
334 | break; |
335 | case destFitV: |
336 | if (dest->getChangeLeft()) { |
337 | s.appendf(fmt: "[ FitV {0:4.0g} " , dest->getLeft()); |
338 | } else { |
339 | s.append(str: "[ FitV null " ); |
340 | } |
341 | break; |
342 | case destFitR: |
343 | s.appendf(fmt: "[ FitR {0:4.0g} {1:4.0g} {2:4.0g} {3:4.0g} " , dest->getLeft(), dest->getBottom(), dest->getRight(), dest->getTop()); |
344 | break; |
345 | case destFitB: |
346 | s.append(str: "[ FitB " ); |
347 | break; |
348 | case destFitBH: |
349 | if (dest->getChangeTop()) { |
350 | s.appendf(fmt: "[ FitBH {0:4.0g} " , dest->getTop()); |
351 | } else { |
352 | s.append(str: "[ FitBH null " ); |
353 | } |
354 | break; |
355 | case destFitBV: |
356 | if (dest->getChangeLeft()) { |
357 | s.appendf(fmt: "[ FitBV {0:4.0g} " , dest->getLeft()); |
358 | } else { |
359 | s.append(str: "[ FitBV null " ); |
360 | } |
361 | break; |
362 | } |
363 | |
364 | s.append(str: " " ); |
365 | s.setChar(i: 26, c: ']'); |
366 | s.setChar(i: 27, c: '\0'); |
367 | printf(format: "%s" , s.c_str()); |
368 | } |
369 | |
370 | static void printDestinations(PDFDoc *doc, const UnicodeMap *uMap) |
371 | { |
372 | std::map<Ref, std::map<GooString *, std::unique_ptr<LinkDest>, GooStringCompare>> map; |
373 | |
374 | int numDests = doc->getCatalog()->numDestNameTree(); |
375 | for (int i = 0; i < numDests; i++) { |
376 | GooString *name = new GooString(doc->getCatalog()->getDestNameTreeName(i)); |
377 | std::unique_ptr<LinkDest> dest = doc->getCatalog()->getDestNameTreeDest(i); |
378 | if (dest && dest->isPageRef()) { |
379 | Ref = dest->getPageRef(); |
380 | map[pageRef].insert(x: std::make_pair(x&: name, y: std::move(dest))); |
381 | } else { |
382 | delete name; |
383 | } |
384 | } |
385 | |
386 | numDests = doc->getCatalog()->numDests(); |
387 | for (int i = 0; i < numDests; i++) { |
388 | GooString *name = new GooString(doc->getCatalog()->getDestsName(i)); |
389 | std::unique_ptr<LinkDest> dest = doc->getCatalog()->getDestsDest(i); |
390 | if (dest && dest->isPageRef()) { |
391 | Ref = dest->getPageRef(); |
392 | map[pageRef].insert(x: std::make_pair(x&: name, y: std::move(dest))); |
393 | } else { |
394 | delete name; |
395 | } |
396 | } |
397 | |
398 | printf(format: "Page Destination Name\n" ); |
399 | for (int i = firstPage; i <= lastPage; i++) { |
400 | Ref *ref = doc->getCatalog()->getPageRef(i); |
401 | if (ref) { |
402 | auto pageDests = map.find(x: *ref); |
403 | if (pageDests != map.end()) { |
404 | for (auto &it : pageDests->second) { |
405 | printf(format: "%4d " , i); |
406 | printLinkDest(dest: it.second); |
407 | printf(format: " \"" ); |
408 | printTextString(s: it.first, uMap); |
409 | printf(format: "\"\n" ); |
410 | delete it.first; |
411 | } |
412 | } |
413 | } |
414 | } |
415 | } |
416 | |
417 | static void printUrlList(PDFDoc *doc) |
418 | { |
419 | printf(format: "Page Type URL\n" ); |
420 | for (int pg = firstPage; pg <= lastPage; pg++) { |
421 | Page *page = doc->getPage(page: pg); |
422 | if (page) { |
423 | std::unique_ptr<Links> links = page->getLinks(); |
424 | for (AnnotLink *annot : links->getLinks()) { |
425 | LinkAction *action = annot->getAction(); |
426 | if (action->getKind() == actionURI) { |
427 | LinkURI *linkUri = dynamic_cast<LinkURI *>(action); |
428 | std::string uri = linkUri->getURI(); |
429 | printf(format: "%4d Annotation %s\n" , pg, uri.c_str()); |
430 | } |
431 | } |
432 | } |
433 | } |
434 | } |
435 | |
436 | static void printPdfSubtype(PDFDoc *doc, const UnicodeMap *uMap) |
437 | { |
438 | const Object info = doc->getDocInfo(); |
439 | if (info.isDict()) { |
440 | const PDFSubtype pdftype = doc->getPDFSubtype(); |
441 | |
442 | if ((pdftype == subtypeNull) | (pdftype == subtypeNone)) { |
443 | return; |
444 | } |
445 | |
446 | std::unique_ptr<GooString> part; |
447 | std::unique_ptr<GooString> abbr; |
448 | std::unique_ptr<GooString> standard; |
449 | std::unique_ptr<GooString> typeExp; |
450 | std::unique_ptr<GooString> confExp; |
451 | |
452 | // Form title from PDFSubtype |
453 | switch (pdftype) { |
454 | case subtypePDFA: |
455 | printInfoString(infoDict: info.getDict(), key: "GTS_PDFA1Version" , text: "PDF subtype: " , uMap); |
456 | typeExp = std::make_unique<GooString>(args: "ISO 19005 - Electronic document file format for long-term preservation (PDF/A)" ); |
457 | standard = std::make_unique<GooString>(args: "ISO 19005" ); |
458 | abbr = std::make_unique<GooString>(args: "PDF/A" ); |
459 | break; |
460 | case subtypePDFE: |
461 | printInfoString(infoDict: info.getDict(), key: "GTS_PDFEVersion" , text: "PDF subtype: " , uMap); |
462 | typeExp = std::make_unique<GooString>(args: "ISO 24517 - Engineering document format using PDF (PDF/E)" ); |
463 | standard = std::make_unique<GooString>(args: "ISO 24517" ); |
464 | abbr = std::make_unique<GooString>(args: "PDF/E" ); |
465 | break; |
466 | case subtypePDFUA: |
467 | printInfoString(infoDict: info.getDict(), key: "GTS_PDFUAVersion" , text: "PDF subtype: " , uMap); |
468 | typeExp = std::make_unique<GooString>(args: "ISO 14289 - Electronic document file format enhancement for accessibility (PDF/UA)" ); |
469 | standard = std::make_unique<GooString>(args: "ISO 14289" ); |
470 | abbr = std::make_unique<GooString>(args: "PDF/UA" ); |
471 | break; |
472 | case subtypePDFVT: |
473 | printInfoString(infoDict: info.getDict(), key: "GTS_PDFVTVersion" , text: "PDF subtype: " , uMap); |
474 | typeExp = std::make_unique<GooString>(args: "ISO 16612 - Electronic document file format for variable data exchange (PDF/VT)" ); |
475 | standard = std::make_unique<GooString>(args: "ISO 16612" ); |
476 | abbr = std::make_unique<GooString>(args: "PDF/VT" ); |
477 | break; |
478 | case subtypePDFX: |
479 | printInfoString(infoDict: info.getDict(), key: "GTS_PDFXVersion" , text: "PDF subtype: " , uMap); |
480 | typeExp = std::make_unique<GooString>(args: "ISO 15930 - Electronic document file format for prepress digital data exchange (PDF/X)" ); |
481 | standard = std::make_unique<GooString>(args: "ISO 15930" ); |
482 | abbr = std::make_unique<GooString>(args: "PDF/X" ); |
483 | break; |
484 | case subtypeNone: |
485 | case subtypeNull: |
486 | default: |
487 | return; |
488 | } |
489 | |
490 | // Form the abbreviation from PDFSubtypePart and PDFSubtype |
491 | const PDFSubtypePart subpart = doc->getPDFSubtypePart(); |
492 | switch (pdftype) { |
493 | case subtypePDFX: |
494 | switch (subpart) { |
495 | case subtypePart1: |
496 | abbr->append(str: "-1:2001" ); |
497 | break; |
498 | case subtypePart2: |
499 | abbr->append(str: "-2" ); |
500 | break; |
501 | case subtypePart3: |
502 | abbr->append(str: "-3:2002" ); |
503 | break; |
504 | case subtypePart4: |
505 | abbr->append(str: "-1:2003" ); |
506 | break; |
507 | case subtypePart5: |
508 | abbr->append(str: "-2" ); |
509 | break; |
510 | case subtypePart6: |
511 | abbr->append(str: "-3:2003" ); |
512 | break; |
513 | case subtypePart7: |
514 | abbr->append(str: "-4" ); |
515 | break; |
516 | case subtypePart8: |
517 | abbr->append(str: "-5" ); |
518 | break; |
519 | default: |
520 | break; |
521 | } |
522 | break; |
523 | case subtypeNone: |
524 | case subtypeNull: |
525 | break; |
526 | default: |
527 | abbr->appendf(fmt: "-{0:d}" , subpart); |
528 | break; |
529 | } |
530 | |
531 | // Form standard from PDFSubtypePart |
532 | switch (subpart) { |
533 | case subtypePartNone: |
534 | case subtypePartNull: |
535 | break; |
536 | default: |
537 | standard->appendf(fmt: "-{0:d}" , subpart); |
538 | break; |
539 | } |
540 | |
541 | // Form the subtitle from PDFSubtypePart and PDFSubtype |
542 | switch (pdftype) { |
543 | case subtypePDFA: |
544 | switch (subpart) { |
545 | case subtypePart1: |
546 | part = std::make_unique<GooString>(args: "Use of PDF 1.4" ); |
547 | break; |
548 | case subtypePart2: |
549 | part = std::make_unique<GooString>(args: "Use of ISO 32000-1" ); |
550 | break; |
551 | case subtypePart3: |
552 | part = std::make_unique<GooString>(args: "Use of ISO 32000-1 with support for embedded files" ); |
553 | break; |
554 | default: |
555 | break; |
556 | } |
557 | break; |
558 | case subtypePDFE: |
559 | switch (subpart) { |
560 | case subtypePart1: |
561 | part = std::make_unique<GooString>(args: "Use of PDF 1.6" ); |
562 | break; |
563 | default: |
564 | break; |
565 | } |
566 | break; |
567 | case subtypePDFUA: |
568 | switch (subpart) { |
569 | case subtypePart1: |
570 | part = std::make_unique<GooString>(args: "Use of ISO 32000-1" ); |
571 | break; |
572 | case subtypePart2: |
573 | part = std::make_unique<GooString>(args: "Use of ISO 32000-2" ); |
574 | break; |
575 | case subtypePart3: |
576 | part = std::make_unique<GooString>(args: "Use of ISO 32000-1 with support for embedded files" ); |
577 | break; |
578 | default: |
579 | break; |
580 | } |
581 | break; |
582 | case subtypePDFVT: |
583 | switch (subpart) { |
584 | case subtypePart1: |
585 | part = std::make_unique<GooString>(args: "Using PPML 2.1 and PDF 1.4" ); |
586 | break; |
587 | case subtypePart2: |
588 | part = std::make_unique<GooString>(args: "Using PDF/X-4 and PDF/X-5 (PDF/VT-1 and PDF/VT-2)" ); |
589 | break; |
590 | case subtypePart3: |
591 | part = std::make_unique<GooString>(args: "Using PDF/X-6 (PDF/VT-3)" ); |
592 | break; |
593 | default: |
594 | break; |
595 | } |
596 | break; |
597 | case subtypePDFX: |
598 | switch (subpart) { |
599 | case subtypePart1: |
600 | part = std::make_unique<GooString>(args: "Complete exchange using CMYK data (PDF/X-1 and PDF/X-1a)" ); |
601 | break; |
602 | case subtypePart3: |
603 | part = std::make_unique<GooString>(args: "Complete exchange suitable for colour-managed workflows (PDF/X-3)" ); |
604 | break; |
605 | case subtypePart4: |
606 | part = std::make_unique<GooString>(args: "Complete exchange of CMYK and spot colour printing data using PDF 1.4 (PDF/X-1a)" ); |
607 | break; |
608 | case subtypePart5: |
609 | part = std::make_unique<GooString>(args: "Partial exchange of printing data using PDF 1.4 (PDF/X-2) [Withdrawn]" ); |
610 | break; |
611 | case subtypePart6: |
612 | part = std::make_unique<GooString>(args: "Complete exchange of printing data suitable for colour-managed workflows using PDF 1.4 (PDF/X-3)" ); |
613 | break; |
614 | case subtypePart7: |
615 | part = std::make_unique<GooString>(args: "Complete exchange of printing data (PDF/X-4) and partial exchange of printing data with external profile reference (PDF/X-4p) using PDF 1.6" ); |
616 | break; |
617 | case subtypePart8: |
618 | part = std::make_unique<GooString>(args: "Partial exchange of printing data using PDF 1.6 (PDF/X-5)" ); |
619 | break; |
620 | default: |
621 | break; |
622 | } |
623 | break; |
624 | default: |
625 | break; |
626 | } |
627 | |
628 | // Form Conformance explanation from PDFSubtypeConformance |
629 | switch (doc->getPDFSubtypeConformance()) { |
630 | case subtypeConfA: |
631 | confExp = std::make_unique<GooString>(args: "Level A, Accessible" ); |
632 | break; |
633 | case subtypeConfB: |
634 | confExp = std::make_unique<GooString>(args: "Level B, Basic" ); |
635 | break; |
636 | case subtypeConfG: |
637 | confExp = std::make_unique<GooString>(args: "Level G, External graphical content" ); |
638 | break; |
639 | case subtypeConfN: |
640 | confExp = std::make_unique<GooString>(args: "Level N, External ICC profile" ); |
641 | break; |
642 | case subtypeConfP: |
643 | confExp = std::make_unique<GooString>(args: "Level P, Embedded ICC profile" ); |
644 | break; |
645 | case subtypeConfPG: |
646 | confExp = std::make_unique<GooString>(args: "Level PG, Embedded ICC profile and external graphical content" ); |
647 | break; |
648 | case subtypeConfU: |
649 | confExp = std::make_unique<GooString>(args: "Level U, Unicode support" ); |
650 | break; |
651 | case subtypeConfNone: |
652 | case subtypeConfNull: |
653 | default: |
654 | confExp.reset(); |
655 | break; |
656 | } |
657 | |
658 | printf(format: " Title: %s\n" , typeExp->c_str()); |
659 | printf(format: " Abbreviation: %s\n" , abbr->c_str()); |
660 | if (part.get()) { |
661 | printf(format: " Subtitle: Part %d: %s\n" , subpart, part->c_str()); |
662 | } else { |
663 | printf(format: " Subtitle: Part %d\n" , subpart); |
664 | } |
665 | printf(format: " Standard: %s-%d\n" , typeExp->toStr().substr(pos: 0, n: 9).c_str(), subpart); |
666 | if (confExp.get()) { |
667 | printf(format: " Conformance: %s\n" , confExp->c_str()); |
668 | } |
669 | } |
670 | } |
671 | |
672 | static void printCustomInfo(PDFDoc *doc, const UnicodeMap *uMap) |
673 | { |
674 | Object info = doc->getDocInfo(); |
675 | if (info.isDict()) { |
676 | Dict *dict = info.getDict(); |
677 | |
678 | // Sort keys |
679 | std::set<std::string> keys; |
680 | for (int i = 0; i < dict->getLength(); i++) { |
681 | std::string key(dict->getKey(i)); |
682 | if (key != "Trapped" ) { |
683 | keys.insert(x: key); |
684 | } |
685 | } |
686 | |
687 | for (const std::string &key : keys) { |
688 | if (key == "CreationDate" ) { |
689 | if (isoDates) { |
690 | printISODate(infoDict: info.getDict(), key: "CreationDate" , text: "CreationDate: " , uMap); |
691 | } else if (rawDates) { |
692 | printInfoString(infoDict: info.getDict(), key: "CreationDate" , text: "CreationDate: " , uMap); |
693 | } else { |
694 | printInfoDate(infoDict: info.getDict(), key: "CreationDate" , text: "CreationDate: " , uMap); |
695 | } |
696 | } else if (key == "ModDate" ) { |
697 | if (isoDates) { |
698 | printISODate(infoDict: info.getDict(), key: "ModDate" , text: "ModDate: " , uMap); |
699 | } else if (rawDates) { |
700 | printInfoString(infoDict: info.getDict(), key: "ModDate" , text: "ModDate: " , uMap); |
701 | } else { |
702 | printInfoDate(infoDict: info.getDict(), key: "ModDate" , text: "ModDate: " , uMap); |
703 | } |
704 | } else { |
705 | Object obj = dict->lookup(key: key.c_str()); |
706 | if (obj.isString()) { |
707 | // print key |
708 | Unicode *u; |
709 | int len = utf8ToUCS4(utf8: key.c_str(), ucs4_out: &u); |
710 | printUCS4String(u, len, uMap); |
711 | fputs(s: ":" , stdout); |
712 | while (len < 16) { |
713 | fputs(s: " " , stdout); |
714 | len++; |
715 | } |
716 | gfree(p: u); |
717 | |
718 | // print value |
719 | GooString val_str(obj.getString()); |
720 | printTextString(s: &val_str, uMap); |
721 | fputc(c: '\n', stdout); |
722 | } |
723 | } |
724 | } |
725 | } |
726 | } |
727 | |
728 | static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, bool multiPage) |
729 | { |
730 | Page *page; |
731 | char buf[256]; |
732 | double w, h, wISO, hISO, isoThreshold; |
733 | int pg, i; |
734 | int r; |
735 | |
736 | // print doc info |
737 | Object info = doc->getDocInfo(); |
738 | if (info.isDict()) { |
739 | printInfoString(infoDict: info.getDict(), key: "Title" , text: "Title: " , uMap); |
740 | printInfoString(infoDict: info.getDict(), key: "Subject" , text: "Subject: " , uMap); |
741 | printInfoString(infoDict: info.getDict(), key: "Keywords" , text: "Keywords: " , uMap); |
742 | printInfoString(infoDict: info.getDict(), key: "Author" , text: "Author: " , uMap); |
743 | printInfoString(infoDict: info.getDict(), key: "Creator" , text: "Creator: " , uMap); |
744 | printInfoString(infoDict: info.getDict(), key: "Producer" , text: "Producer: " , uMap); |
745 | if (isoDates) { |
746 | printISODate(infoDict: info.getDict(), key: "CreationDate" , text: "CreationDate: " , uMap); |
747 | printISODate(infoDict: info.getDict(), key: "ModDate" , text: "ModDate: " , uMap); |
748 | } else if (rawDates) { |
749 | printInfoString(infoDict: info.getDict(), key: "CreationDate" , text: "CreationDate: " , uMap); |
750 | printInfoString(infoDict: info.getDict(), key: "ModDate" , text: "ModDate: " , uMap); |
751 | } else { |
752 | printInfoDate(infoDict: info.getDict(), key: "CreationDate" , text: "CreationDate: " , uMap); |
753 | printInfoDate(infoDict: info.getDict(), key: "ModDate" , text: "ModDate: " , uMap); |
754 | } |
755 | } |
756 | |
757 | bool hasMetadata = false; |
758 | std::unique_ptr<GooString> metadata = doc->readMetadata(); |
759 | if (metadata) { |
760 | hasMetadata = true; |
761 | } |
762 | |
763 | const std::set<std::string> docInfoStandardKeys { "Title" , "Author" , "Subject" , "Keywords" , "Creator" , "Producer" , "CreationDate" , "ModDate" , "Trapped" }; |
764 | |
765 | bool hasCustom = false; |
766 | if (info.isDict()) { |
767 | Dict *dict = info.getDict(); |
768 | for (i = 0; i < dict->getLength(); i++) { |
769 | std::string key(dict->getKey(i)); |
770 | if (docInfoStandardKeys.find(x: key) == docInfoStandardKeys.end()) { |
771 | hasCustom = true; |
772 | break; |
773 | } |
774 | } |
775 | } |
776 | |
777 | // print metadata info |
778 | printf(format: "Custom Metadata: %s\n" , hasCustom ? "yes" : "no" ); |
779 | printf(format: "Metadata Stream: %s\n" , hasMetadata ? "yes" : "no" ); |
780 | |
781 | // print tagging info |
782 | printf(format: "Tagged: %s\n" , (doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked) ? "yes" : "no" ); |
783 | printf(format: "UserProperties: %s\n" , (doc->getCatalog()->getMarkInfo() & Catalog::markInfoUserProperties) ? "yes" : "no" ); |
784 | printf(format: "Suspects: %s\n" , (doc->getCatalog()->getMarkInfo() & Catalog::markInfoSuspects) ? "yes" : "no" ); |
785 | |
786 | // print form info |
787 | switch (doc->getCatalog()->getFormType()) { |
788 | case Catalog::NoForm: |
789 | printf(format: "Form: none\n" ); |
790 | break; |
791 | case Catalog::AcroForm: |
792 | printf(format: "Form: AcroForm\n" ); |
793 | break; |
794 | case Catalog::XfaForm: |
795 | printf(format: "Form: XFA\n" ); |
796 | break; |
797 | } |
798 | |
799 | // print javascript info |
800 | { |
801 | JSInfo jsInfo(doc, firstPage - 1); |
802 | jsInfo.scanJS(nPages: lastPage - firstPage + 1); |
803 | printf(format: "JavaScript: %s\n" , jsInfo.containsJS() ? "yes" : "no" ); |
804 | } |
805 | |
806 | // print page count |
807 | printf(format: "Pages: %d\n" , doc->getNumPages()); |
808 | |
809 | // print encryption info |
810 | printf(format: "Encrypted: " ); |
811 | if (doc->isEncrypted()) { |
812 | unsigned char *fileKey; |
813 | CryptAlgorithm encAlgorithm; |
814 | int keyLength; |
815 | doc->getXRef()->getEncryptionParameters(fileKeyA: &fileKey, encAlgorithmA: &encAlgorithm, keyLengthA: &keyLength); |
816 | |
817 | const char *encAlgorithmName = "unknown" ; |
818 | switch (encAlgorithm) { |
819 | case cryptRC4: |
820 | encAlgorithmName = "RC4" ; |
821 | break; |
822 | case cryptAES: |
823 | encAlgorithmName = "AES" ; |
824 | break; |
825 | case cryptAES256: |
826 | encAlgorithmName = "AES-256" ; |
827 | break; |
828 | case cryptNone: |
829 | break; |
830 | } |
831 | |
832 | printf(format: "yes (print:%s copy:%s change:%s addNotes:%s algorithm:%s)\n" , doc->okToPrint(ignoreOwnerPW: true) ? "yes" : "no" , doc->okToCopy(ignoreOwnerPW: true) ? "yes" : "no" , doc->okToChange(ignoreOwnerPW: true) ? "yes" : "no" , doc->okToAddNotes(ignoreOwnerPW: true) ? "yes" : "no" , |
833 | encAlgorithmName); |
834 | } else { |
835 | printf(format: "no\n" ); |
836 | } |
837 | |
838 | // print page size |
839 | for (pg = firstPage; pg <= lastPage; ++pg) { |
840 | w = doc->getPageCropWidth(page: pg); |
841 | h = doc->getPageCropHeight(page: pg); |
842 | if (multiPage) { |
843 | printf(format: "Page %4d size: %g x %g pts" , pg, w, h); |
844 | } else { |
845 | printf(format: "Page size: %g x %g pts" , w, h); |
846 | } |
847 | if ((fabs(x: w - 612) < 1 && fabs(x: h - 792) < 1) || (fabs(x: w - 792) < 1 && fabs(x: h - 612) < 1)) { |
848 | printf(format: " (letter)" ); |
849 | } else { |
850 | hISO = sqrt(x: sqrt(x: 2.0)) * 7200 / 2.54; |
851 | wISO = hISO / sqrt(x: 2.0); |
852 | isoThreshold = hISO * 0.003; ///< allow for 0.3% error when guessing conformance to ISO 216, A series |
853 | for (i = 0; i <= 6; ++i) { |
854 | if ((fabs(x: w - wISO) < isoThreshold && fabs(x: h - hISO) < isoThreshold) || (fabs(x: w - hISO) < isoThreshold && fabs(x: h - wISO) < isoThreshold)) { |
855 | printf(format: " (A%d)" , i); |
856 | break; |
857 | } |
858 | hISO = wISO; |
859 | wISO /= sqrt(x: 2.0); |
860 | isoThreshold /= sqrt(x: 2.0); |
861 | } |
862 | } |
863 | printf(format: "\n" ); |
864 | r = doc->getPageRotate(page: pg); |
865 | if (multiPage) { |
866 | printf(format: "Page %4d rot: %d\n" , pg, r); |
867 | } else { |
868 | printf(format: "Page rot: %d\n" , r); |
869 | } |
870 | } |
871 | |
872 | // print the boxes |
873 | if (printBoxes) { |
874 | if (multiPage) { |
875 | for (pg = firstPage; pg <= lastPage; ++pg) { |
876 | page = doc->getPage(page: pg); |
877 | if (!page) { |
878 | error(category: errSyntaxError, pos: -1, msg: "Failed to print boxes for page {0:d}" , pg); |
879 | continue; |
880 | } |
881 | sprintf(s: buf, format: "Page %4d MediaBox: " , pg); |
882 | printBox(text: buf, box: page->getMediaBox()); |
883 | sprintf(s: buf, format: "Page %4d CropBox: " , pg); |
884 | printBox(text: buf, box: page->getCropBox()); |
885 | sprintf(s: buf, format: "Page %4d BleedBox: " , pg); |
886 | printBox(text: buf, box: page->getBleedBox()); |
887 | sprintf(s: buf, format: "Page %4d TrimBox: " , pg); |
888 | printBox(text: buf, box: page->getTrimBox()); |
889 | sprintf(s: buf, format: "Page %4d ArtBox: " , pg); |
890 | printBox(text: buf, box: page->getArtBox()); |
891 | } |
892 | } else { |
893 | page = doc->getPage(page: firstPage); |
894 | if (!page) { |
895 | error(category: errSyntaxError, pos: -1, msg: "Failed to print boxes for page {0:d}" , firstPage); |
896 | } else { |
897 | printBox(text: "MediaBox: " , box: page->getMediaBox()); |
898 | printBox(text: "CropBox: " , box: page->getCropBox()); |
899 | printBox(text: "BleedBox: " , box: page->getBleedBox()); |
900 | printBox(text: "TrimBox: " , box: page->getTrimBox()); |
901 | printBox(text: "ArtBox: " , box: page->getArtBox()); |
902 | } |
903 | } |
904 | } |
905 | |
906 | // print file size |
907 | printf(format: "File size: %lld bytes\n" , filesize); |
908 | |
909 | // print linearization info |
910 | printf(format: "Optimized: %s\n" , doc->isLinearized() ? "yes" : "no" ); |
911 | |
912 | // print PDF version |
913 | printf(format: "PDF version: %d.%d\n" , doc->getPDFMajorVersion(), doc->getPDFMinorVersion()); |
914 | |
915 | printPdfSubtype(doc, uMap); |
916 | } |
917 | |
918 | int main(int argc, char *argv[]) |
919 | { |
920 | std::unique_ptr<PDFDoc> doc; |
921 | GooString *fileName; |
922 | std::optional<GooString> ownerPW, userPW; |
923 | const UnicodeMap *uMap; |
924 | FILE *f; |
925 | bool ok; |
926 | int exitCode; |
927 | bool multiPage; |
928 | |
929 | exitCode = 99; |
930 | |
931 | // parse args |
932 | Win32Console win32console(&argc, &argv); |
933 | ok = parseArgs(args: argDesc, argc: &argc, argv); |
934 | if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) { |
935 | fprintf(stderr, format: "pdfinfo version %s\n" , PACKAGE_VERSION); |
936 | fprintf(stderr, format: "%s\n" , popplerCopyright); |
937 | fprintf(stderr, format: "%s\n" , xpdfCopyright); |
938 | if (!printVersion) { |
939 | printUsage(program: "pdfinfo" , otherArgs: "<PDF-file>" , args: argDesc); |
940 | } |
941 | if (printVersion || printHelp) { |
942 | exitCode = 0; |
943 | } |
944 | goto err0; |
945 | } |
946 | |
947 | if (printStructureText) { |
948 | printStructure = true; |
949 | } |
950 | |
951 | // read config file |
952 | globalParams = std::make_unique<GlobalParams>(); |
953 | |
954 | if (printEnc) { |
955 | printEncodings(); |
956 | exitCode = 0; |
957 | goto err0; |
958 | } |
959 | |
960 | fileName = new GooString(argv[1]); |
961 | |
962 | if (textEncName[0]) { |
963 | globalParams->setTextEncoding(textEncName); |
964 | } |
965 | |
966 | // get mapping to output encoding |
967 | if (!(uMap = globalParams->getTextEncoding())) { |
968 | error(category: errCommandLine, pos: -1, msg: "Couldn't get text encoding" ); |
969 | delete fileName; |
970 | goto err1; |
971 | } |
972 | |
973 | // open PDF file |
974 | if (ownerPassword[0] != '\001') { |
975 | ownerPW = GooString(ownerPassword); |
976 | } |
977 | if (userPassword[0] != '\001') { |
978 | userPW = GooString(userPassword); |
979 | } |
980 | |
981 | if (fileName->cmp(sA: "-" ) == 0) { |
982 | delete fileName; |
983 | fileName = new GooString("fd://0" ); |
984 | } |
985 | |
986 | doc = PDFDocFactory().createPDFDoc(uri: *fileName, ownerPassword: ownerPW, userPassword: userPW); |
987 | |
988 | if (!doc->isOk()) { |
989 | exitCode = 1; |
990 | goto err2; |
991 | } |
992 | |
993 | // get page range |
994 | if (firstPage < 1) { |
995 | firstPage = 1; |
996 | } |
997 | if (lastPage == 0) { |
998 | multiPage = false; |
999 | } else { |
1000 | multiPage = true; |
1001 | } |
1002 | if (lastPage < 1 || lastPage > doc->getNumPages()) { |
1003 | lastPage = doc->getNumPages(); |
1004 | } |
1005 | if (lastPage < firstPage) { |
1006 | error(category: errCommandLine, pos: -1, msg: "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d})." , firstPage, lastPage); |
1007 | goto err2; |
1008 | } |
1009 | |
1010 | if (printMetadata) { |
1011 | // print the metadata |
1012 | const std::unique_ptr<GooString> metadata = doc->readMetadata(); |
1013 | if (metadata) { |
1014 | fputs(s: metadata->c_str(), stdout); |
1015 | fputc(c: '\n', stdout); |
1016 | } |
1017 | } else if (printCustom) { |
1018 | printCustomInfo(doc: doc.get(), uMap); |
1019 | } else if (printJS) { |
1020 | // print javascript |
1021 | JSInfo jsInfo(doc.get(), firstPage - 1); |
1022 | jsInfo.scanJS(nPages: lastPage - firstPage + 1, stdout, uMap); |
1023 | } else if (printStructure || printStructureText) { |
1024 | // print structure |
1025 | const StructTreeRoot *structTree = doc->getCatalog()->getStructTreeRoot(); |
1026 | if (structTree) { |
1027 | for (unsigned i = 0; i < structTree->getNumChildren(); i++) { |
1028 | printStruct(element: structTree->getChild(i), indent: 0); |
1029 | } |
1030 | } |
1031 | } else if (printDests) { |
1032 | printDestinations(doc: doc.get(), uMap); |
1033 | } else if (printUrls) { |
1034 | printUrlList(doc: doc.get()); |
1035 | } else { |
1036 | // print info |
1037 | long long filesize = 0; |
1038 | |
1039 | f = fopen(filename: fileName->c_str(), modes: "rb" ); |
1040 | if (f) { |
1041 | Gfseek(f, offset: 0, SEEK_END); |
1042 | filesize = Gftell(f); |
1043 | fclose(stream: f); |
1044 | } |
1045 | |
1046 | if (multiPage == false) { |
1047 | lastPage = 1; |
1048 | } |
1049 | |
1050 | printInfo(doc: doc.get(), uMap, filesize, multiPage); |
1051 | } |
1052 | exitCode = 0; |
1053 | |
1054 | // clean up |
1055 | err2: |
1056 | delete fileName; |
1057 | err1: |
1058 | err0: |
1059 | |
1060 | return exitCode; |
1061 | } |
1062 | |