1 | //======================================================================== |
2 | // |
3 | // pdfdetach.cc |
4 | // |
5 | // Copyright 2010 Glyph & Cog, LLC |
6 | // |
7 | //======================================================================== |
8 | |
9 | //======================================================================== |
10 | // |
11 | // Modified under the Poppler project - http://poppler.freedesktop.org |
12 | // |
13 | // All changes made under the Poppler project to this file are licensed |
14 | // under GPL version 2 or later |
15 | // |
16 | // Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org> |
17 | // Copyright (C) 2013 Yury G. Kudryashov <urkud.urkud@gmail.com> |
18 | // Copyright (C) 2014, 2017 Adrian Johnson <ajohnson@redneon.com> |
19 | // Copyright (C) 2018, 2020, 2022, 2024 Albert Astals Cid <aacid@kde.org> |
20 | // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> |
21 | // Copyright (C) 2019, 2021, 2024 Oliver Sander <oliver.sander@tu-dresden.de> |
22 | // Copyright (C) 2020 <r.coeffier@bee-buzziness.com> |
23 | // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> |
24 | // |
25 | // To see a description of the changes please see the Changelog file that |
26 | // came with your tarball or type make ChangeLog if you are building from git |
27 | // |
28 | //======================================================================== |
29 | |
30 | #include "config.h" |
31 | #include <poppler-config.h> |
32 | #include <cstdio> |
33 | #include "goo/gmem.h" |
34 | #include "parseargs.h" |
35 | #include "Annot.h" |
36 | #include "GlobalParams.h" |
37 | #include "Page.h" |
38 | #include "PDFDoc.h" |
39 | #include "PDFDocFactory.h" |
40 | #include "FileSpec.h" |
41 | #include "CharTypes.h" |
42 | #include "Catalog.h" |
43 | #include "UnicodeMap.h" |
44 | #include "PDFDocEncoding.h" |
45 | #include "Error.h" |
46 | #include "UTF.h" |
47 | #include "Win32Console.h" |
48 | |
49 | #include <filesystem> |
50 | |
51 | static bool doList = false; |
52 | static int saveNum = 0; |
53 | static char saveFile[128] = "" ; |
54 | static bool saveAll = false; |
55 | static char savePath[1024] = "" ; |
56 | static char textEncName[128] = "" ; |
57 | static char ownerPassword[33] = "\001" ; |
58 | static char userPassword[33] = "\001" ; |
59 | static bool printVersion = false; |
60 | static bool printHelp = false; |
61 | |
62 | static const ArgDesc argDesc[] = { { .arg: "-list" , .kind: argFlag, .val: &doList, .size: 0, .usage: "list all embedded files" }, |
63 | { .arg: "-save" , .kind: argInt, .val: &saveNum, .size: 0, .usage: "save the specified embedded file (file number)" }, |
64 | { .arg: "-savefile" , .kind: argString, .val: &saveFile, .size: sizeof(saveFile), .usage: "save the specified embedded file (file name)" }, |
65 | { .arg: "-saveall" , .kind: argFlag, .val: &saveAll, .size: 0, .usage: "save all embedded files" }, |
66 | { .arg: "-o" , .kind: argString, .val: savePath, .size: sizeof(savePath), .usage: "file name for the saved embedded file" }, |
67 | { .arg: "-enc" , .kind: argString, .val: textEncName, .size: sizeof(textEncName), .usage: "output text encoding name" }, |
68 | { .arg: "-opw" , .kind: argString, .val: ownerPassword, .size: sizeof(ownerPassword), .usage: "owner password (for encrypted files)" }, |
69 | { .arg: "-upw" , .kind: argString, .val: userPassword, .size: sizeof(userPassword), .usage: "user password (for encrypted files)" }, |
70 | { .arg: "-v" , .kind: argFlag, .val: &printVersion, .size: 0, .usage: "print copyright and version info" }, |
71 | { .arg: "-h" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
72 | { .arg: "-help" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
73 | { .arg: "--help" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
74 | { .arg: "-?" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
75 | {} }; |
76 | |
77 | int main(int argc, char *argv[]) |
78 | { |
79 | std::unique_ptr<PDFDoc> doc; |
80 | GooString *fileName; |
81 | const UnicodeMap *uMap; |
82 | std::optional<GooString> ownerPW, userPW; |
83 | char uBuf[8]; |
84 | bool ok; |
85 | bool hasSaveFile; |
86 | std::vector<std::unique_ptr<FileSpec>> embeddedFiles; |
87 | int nFiles, nPages, n, i, j; |
88 | Page *page; |
89 | Annots *annots; |
90 | const GooString *s1; |
91 | Unicode u; |
92 | bool isUnicode; |
93 | |
94 | Win32Console win32Console(&argc, &argv); |
95 | |
96 | // parse args |
97 | ok = parseArgs(args: argDesc, argc: &argc, argv); |
98 | hasSaveFile = strlen(s: saveFile) > 0; |
99 | if ((doList ? 1 : 0) + ((saveNum != 0) ? 1 : 0) + ((hasSaveFile != 0) ? 1 : 0) + (saveAll ? 1 : 0) != 1) { |
100 | ok = false; |
101 | } |
102 | if (!ok || argc != 2 || printVersion || printHelp) { |
103 | fprintf(stderr, format: "pdfdetach version %s\n" , PACKAGE_VERSION); |
104 | fprintf(stderr, format: "%s\n" , popplerCopyright); |
105 | fprintf(stderr, format: "%s\n" , xpdfCopyright); |
106 | if (!printVersion) { |
107 | printUsage(program: "pdfdetach" , otherArgs: "<PDF-file>" , args: argDesc); |
108 | } |
109 | return 99; |
110 | } |
111 | fileName = new GooString(argv[1]); |
112 | |
113 | // read config file |
114 | globalParams = std::make_unique<GlobalParams>(); |
115 | if (textEncName[0]) { |
116 | globalParams->setTextEncoding(textEncName); |
117 | } |
118 | |
119 | // get mapping to output encoding |
120 | if (!(uMap = globalParams->getTextEncoding())) { |
121 | error(category: errConfig, pos: -1, msg: "Couldn't get text encoding" ); |
122 | delete fileName; |
123 | return 99; |
124 | } |
125 | |
126 | // open PDF file |
127 | if (ownerPassword[0] != '\001') { |
128 | ownerPW = GooString(ownerPassword); |
129 | } |
130 | if (userPassword[0] != '\001') { |
131 | userPW = GooString(userPassword); |
132 | } |
133 | |
134 | doc = PDFDocFactory().createPDFDoc(uri: *fileName, ownerPassword: ownerPW, userPassword: userPW); |
135 | |
136 | if (!doc->isOk()) { |
137 | return 1; |
138 | } |
139 | |
140 | for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i) { |
141 | embeddedFiles.push_back(x: doc->getCatalog()->embeddedFile(i)); |
142 | } |
143 | |
144 | nPages = doc->getCatalog()->getNumPages(); |
145 | for (i = 0; i < nPages; ++i) { |
146 | page = doc->getCatalog()->getPage(i: i + 1); |
147 | if (!page) { |
148 | continue; |
149 | } |
150 | annots = page->getAnnots(); |
151 | if (!annots) { |
152 | break; |
153 | } |
154 | |
155 | for (Annot *annot : annots->getAnnots()) { |
156 | if (annot->getType() != Annot::typeFileAttachment) { |
157 | continue; |
158 | } |
159 | embeddedFiles.push_back(x: std::make_unique<FileSpec>(args: static_cast<AnnotFileAttachment *>(annot)->getFile())); |
160 | } |
161 | } |
162 | |
163 | nFiles = embeddedFiles.size(); |
164 | |
165 | // list embedded files |
166 | if (doList) { |
167 | printf(format: "%d embedded files\n" , nFiles); |
168 | for (i = 0; i < nFiles; ++i) { |
169 | const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i]; |
170 | printf(format: "%d: " , i + 1); |
171 | s1 = fileSpec->getFileName(); |
172 | if (!s1) { |
173 | return 3; |
174 | } |
175 | if (hasUnicodeByteOrderMark(s: s1->toStr())) { |
176 | isUnicode = true; |
177 | j = 2; |
178 | } else { |
179 | isUnicode = false; |
180 | j = 0; |
181 | } |
182 | while (j < s1->getLength()) { |
183 | if (isUnicode) { |
184 | u = ((s1->getChar(i: j) & 0xff) << 8) | (s1->getChar(i: j + 1) & 0xff); |
185 | j += 2; |
186 | } else { |
187 | u = pdfDocEncoding[s1->getChar(i: j) & 0xff]; |
188 | ++j; |
189 | } |
190 | n = uMap->mapUnicode(u, buf: uBuf, bufSize: sizeof(uBuf)); |
191 | fwrite(ptr: uBuf, size: 1, n: n, stdout); |
192 | } |
193 | fputc(c: '\n', stdout); |
194 | } |
195 | |
196 | // save all embedded files |
197 | } else if (saveAll) { |
198 | std::filesystem::path basePath = savePath; |
199 | if (basePath.empty()) { |
200 | basePath = std::filesystem::current_path(); |
201 | } |
202 | basePath = basePath.lexically_normal(); |
203 | |
204 | for (i = 0; i < nFiles; ++i) { |
205 | const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i]; |
206 | std::string filename; |
207 | |
208 | s1 = fileSpec->getFileName(); |
209 | if (!s1) { |
210 | return 3; |
211 | } |
212 | if (hasUnicodeByteOrderMark(s: s1->toStr())) { |
213 | isUnicode = true; |
214 | j = 2; |
215 | } else { |
216 | isUnicode = false; |
217 | j = 0; |
218 | } |
219 | while (j < s1->getLength()) { |
220 | if (isUnicode) { |
221 | u = ((s1->getChar(i: j) & 0xff) << 8) | (s1->getChar(i: j + 1) & 0xff); |
222 | j += 2; |
223 | } else { |
224 | u = pdfDocEncoding[s1->getChar(i: j) & 0xff]; |
225 | ++j; |
226 | } |
227 | n = uMap->mapUnicode(u, buf: uBuf, bufSize: sizeof(uBuf)); |
228 | filename.append(s: uBuf, n: n); |
229 | } |
230 | |
231 | if (filename.empty()) { |
232 | return 3; |
233 | } |
234 | std::filesystem::path filePath = basePath; |
235 | filePath = filePath.append(source: filename).lexically_normal(); |
236 | |
237 | if (!filePath.generic_string().starts_with(x: basePath.generic_string())) { |
238 | error(category: errIO, pos: -1, msg: "Preventing directory traversal" ); |
239 | return 3; |
240 | } |
241 | |
242 | auto *embFile = fileSpec->getEmbeddedFile(); |
243 | if (!embFile || !embFile->isOk()) { |
244 | return 3; |
245 | } |
246 | if (!embFile->save(path: filePath.generic_string())) { |
247 | error(category: errIO, pos: -1, msg: "Error saving embedded file as '{0:s}'" , filePath.c_str()); |
248 | return 2; |
249 | } |
250 | } |
251 | |
252 | // save an embedded file |
253 | } else { |
254 | if (hasSaveFile) { |
255 | for (i = 0; i < nFiles; ++i) { |
256 | const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i]; |
257 | s1 = fileSpec->getFileName(); |
258 | if (strcmp(s1: s1->c_str(), s2: saveFile) == 0) { |
259 | saveNum = i + 1; |
260 | break; |
261 | } |
262 | } |
263 | } |
264 | if (saveNum < 1 || saveNum > nFiles) { |
265 | error(category: errCommandLine, pos: -1, msg: hasSaveFile ? "Invalid file name" : "Invalid file number" ); |
266 | return 99; |
267 | } |
268 | |
269 | const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[saveNum - 1]; |
270 | std::string targetPath = savePath; |
271 | if (targetPath.empty()) { |
272 | // The user hasn't given a path to save, just use the filename specified in the pdf as name |
273 | s1 = fileSpec->getFileName(); |
274 | if (!s1) { |
275 | return 3; |
276 | } |
277 | if (hasUnicodeByteOrderMark(s: s1->toStr())) { |
278 | isUnicode = true; |
279 | j = 2; |
280 | } else { |
281 | isUnicode = false; |
282 | j = 0; |
283 | } |
284 | while (j < s1->getLength()) { |
285 | if (isUnicode) { |
286 | u = ((s1->getChar(i: j) & 0xff) << 8) | (s1->getChar(i: j + 1) & 0xff); |
287 | j += 2; |
288 | } else { |
289 | u = pdfDocEncoding[s1->getChar(i: j) & 0xff]; |
290 | ++j; |
291 | } |
292 | n = uMap->mapUnicode(u, buf: uBuf, bufSize: sizeof(uBuf)); |
293 | targetPath.append(s: uBuf, n: n); |
294 | } |
295 | |
296 | const std::filesystem::path basePath = std::filesystem::current_path().lexically_normal(); |
297 | std::filesystem::path filePath = basePath; |
298 | filePath = filePath.append(source: targetPath).lexically_normal(); |
299 | |
300 | if (!filePath.generic_string().starts_with(x: basePath.generic_string())) { |
301 | error(category: errIO, pos: -1, msg: "Preventing directory traversal" ); |
302 | return 3; |
303 | } |
304 | targetPath = filePath.generic_string(); |
305 | } |
306 | |
307 | auto *embFile = fileSpec->getEmbeddedFile(); |
308 | if (!embFile || !embFile->isOk()) { |
309 | return 3; |
310 | } |
311 | if (!embFile->save(path: targetPath)) { |
312 | error(category: errIO, pos: -1, msg: "Error saving embedded file as '{0:s}'" , targetPath.c_str()); |
313 | return 2; |
314 | } |
315 | } |
316 | |
317 | return 0; |
318 | } |
319 | |