1//========================================================================
2//
3// pdfdetach.cc
4//
5// Copyright 2010 Glyph & Cog, LLC
6//
7//========================================================================
8
9//========================================================================
10//
11// Modified under the Poppler project - http://poppler.freedesktop.org
12//
13// All changes made under the Poppler project to this file are licensed
14// under GPL version 2 or later
15//
16// Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org>
17// Copyright (C) 2013 Yury G. Kudryashov <urkud.urkud@gmail.com>
18// Copyright (C) 2014, 2017 Adrian Johnson <ajohnson@redneon.com>
19// Copyright (C) 2018, 2020, 2022, 2024 Albert Astals Cid <aacid@kde.org>
20// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
21// Copyright (C) 2019, 2021, 2024 Oliver Sander <oliver.sander@tu-dresden.de>
22// Copyright (C) 2020 <r.coeffier@bee-buzziness.com>
23// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
24//
25// To see a description of the changes please see the Changelog file that
26// came with your tarball or type make ChangeLog if you are building from git
27//
28//========================================================================
29
30#include "config.h"
31#include <poppler-config.h>
32#include <cstdio>
33#include "goo/gmem.h"
34#include "parseargs.h"
35#include "Annot.h"
36#include "GlobalParams.h"
37#include "Page.h"
38#include "PDFDoc.h"
39#include "PDFDocFactory.h"
40#include "FileSpec.h"
41#include "CharTypes.h"
42#include "Catalog.h"
43#include "UnicodeMap.h"
44#include "PDFDocEncoding.h"
45#include "Error.h"
46#include "UTF.h"
47#include "Win32Console.h"
48
49#include <filesystem>
50
51static bool doList = false;
52static int saveNum = 0;
53static char saveFile[128] = "";
54static bool saveAll = false;
55static char savePath[1024] = "";
56static char textEncName[128] = "";
57static char ownerPassword[33] = "\001";
58static char userPassword[33] = "\001";
59static bool printVersion = false;
60static bool printHelp = false;
61
62static const ArgDesc argDesc[] = { { .arg: "-list", .kind: argFlag, .val: &doList, .size: 0, .usage: "list all embedded files" },
63 { .arg: "-save", .kind: argInt, .val: &saveNum, .size: 0, .usage: "save the specified embedded file (file number)" },
64 { .arg: "-savefile", .kind: argString, .val: &saveFile, .size: sizeof(saveFile), .usage: "save the specified embedded file (file name)" },
65 { .arg: "-saveall", .kind: argFlag, .val: &saveAll, .size: 0, .usage: "save all embedded files" },
66 { .arg: "-o", .kind: argString, .val: savePath, .size: sizeof(savePath), .usage: "file name for the saved embedded file" },
67 { .arg: "-enc", .kind: argString, .val: textEncName, .size: sizeof(textEncName), .usage: "output text encoding name" },
68 { .arg: "-opw", .kind: argString, .val: ownerPassword, .size: sizeof(ownerPassword), .usage: "owner password (for encrypted files)" },
69 { .arg: "-upw", .kind: argString, .val: userPassword, .size: sizeof(userPassword), .usage: "user password (for encrypted files)" },
70 { .arg: "-v", .kind: argFlag, .val: &printVersion, .size: 0, .usage: "print copyright and version info" },
71 { .arg: "-h", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
72 { .arg: "-help", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
73 { .arg: "--help", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
74 { .arg: "-?", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
75 {} };
76
77int main(int argc, char *argv[])
78{
79 std::unique_ptr<PDFDoc> doc;
80 GooString *fileName;
81 const UnicodeMap *uMap;
82 std::optional<GooString> ownerPW, userPW;
83 char uBuf[8];
84 bool ok;
85 bool hasSaveFile;
86 std::vector<std::unique_ptr<FileSpec>> embeddedFiles;
87 int nFiles, nPages, n, i, j;
88 Page *page;
89 Annots *annots;
90 const GooString *s1;
91 Unicode u;
92 bool isUnicode;
93
94 Win32Console win32Console(&argc, &argv);
95
96 // parse args
97 ok = parseArgs(args: argDesc, argc: &argc, argv);
98 hasSaveFile = strlen(s: saveFile) > 0;
99 if ((doList ? 1 : 0) + ((saveNum != 0) ? 1 : 0) + ((hasSaveFile != 0) ? 1 : 0) + (saveAll ? 1 : 0) != 1) {
100 ok = false;
101 }
102 if (!ok || argc != 2 || printVersion || printHelp) {
103 fprintf(stderr, format: "pdfdetach version %s\n", PACKAGE_VERSION);
104 fprintf(stderr, format: "%s\n", popplerCopyright);
105 fprintf(stderr, format: "%s\n", xpdfCopyright);
106 if (!printVersion) {
107 printUsage(program: "pdfdetach", otherArgs: "<PDF-file>", args: argDesc);
108 }
109 return 99;
110 }
111 fileName = new GooString(argv[1]);
112
113 // read config file
114 globalParams = std::make_unique<GlobalParams>();
115 if (textEncName[0]) {
116 globalParams->setTextEncoding(textEncName);
117 }
118
119 // get mapping to output encoding
120 if (!(uMap = globalParams->getTextEncoding())) {
121 error(category: errConfig, pos: -1, msg: "Couldn't get text encoding");
122 delete fileName;
123 return 99;
124 }
125
126 // open PDF file
127 if (ownerPassword[0] != '\001') {
128 ownerPW = GooString(ownerPassword);
129 }
130 if (userPassword[0] != '\001') {
131 userPW = GooString(userPassword);
132 }
133
134 doc = PDFDocFactory().createPDFDoc(uri: *fileName, ownerPassword: ownerPW, userPassword: userPW);
135
136 if (!doc->isOk()) {
137 return 1;
138 }
139
140 for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i) {
141 embeddedFiles.push_back(x: doc->getCatalog()->embeddedFile(i));
142 }
143
144 nPages = doc->getCatalog()->getNumPages();
145 for (i = 0; i < nPages; ++i) {
146 page = doc->getCatalog()->getPage(i: i + 1);
147 if (!page) {
148 continue;
149 }
150 annots = page->getAnnots();
151 if (!annots) {
152 break;
153 }
154
155 for (Annot *annot : annots->getAnnots()) {
156 if (annot->getType() != Annot::typeFileAttachment) {
157 continue;
158 }
159 embeddedFiles.push_back(x: std::make_unique<FileSpec>(args: static_cast<AnnotFileAttachment *>(annot)->getFile()));
160 }
161 }
162
163 nFiles = embeddedFiles.size();
164
165 // list embedded files
166 if (doList) {
167 printf(format: "%d embedded files\n", nFiles);
168 for (i = 0; i < nFiles; ++i) {
169 const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i];
170 printf(format: "%d: ", i + 1);
171 s1 = fileSpec->getFileName();
172 if (!s1) {
173 return 3;
174 }
175 if (hasUnicodeByteOrderMark(s: s1->toStr())) {
176 isUnicode = true;
177 j = 2;
178 } else {
179 isUnicode = false;
180 j = 0;
181 }
182 while (j < s1->getLength()) {
183 if (isUnicode) {
184 u = ((s1->getChar(i: j) & 0xff) << 8) | (s1->getChar(i: j + 1) & 0xff);
185 j += 2;
186 } else {
187 u = pdfDocEncoding[s1->getChar(i: j) & 0xff];
188 ++j;
189 }
190 n = uMap->mapUnicode(u, buf: uBuf, bufSize: sizeof(uBuf));
191 fwrite(ptr: uBuf, size: 1, n: n, stdout);
192 }
193 fputc(c: '\n', stdout);
194 }
195
196 // save all embedded files
197 } else if (saveAll) {
198 std::filesystem::path basePath = savePath;
199 if (basePath.empty()) {
200 basePath = std::filesystem::current_path();
201 }
202 basePath = basePath.lexically_normal();
203
204 for (i = 0; i < nFiles; ++i) {
205 const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i];
206 std::string filename;
207
208 s1 = fileSpec->getFileName();
209 if (!s1) {
210 return 3;
211 }
212 if (hasUnicodeByteOrderMark(s: s1->toStr())) {
213 isUnicode = true;
214 j = 2;
215 } else {
216 isUnicode = false;
217 j = 0;
218 }
219 while (j < s1->getLength()) {
220 if (isUnicode) {
221 u = ((s1->getChar(i: j) & 0xff) << 8) | (s1->getChar(i: j + 1) & 0xff);
222 j += 2;
223 } else {
224 u = pdfDocEncoding[s1->getChar(i: j) & 0xff];
225 ++j;
226 }
227 n = uMap->mapUnicode(u, buf: uBuf, bufSize: sizeof(uBuf));
228 filename.append(s: uBuf, n: n);
229 }
230
231 if (filename.empty()) {
232 return 3;
233 }
234 std::filesystem::path filePath = basePath;
235 filePath = filePath.append(source: filename).lexically_normal();
236
237 if (!filePath.generic_string().starts_with(x: basePath.generic_string())) {
238 error(category: errIO, pos: -1, msg: "Preventing directory traversal");
239 return 3;
240 }
241
242 auto *embFile = fileSpec->getEmbeddedFile();
243 if (!embFile || !embFile->isOk()) {
244 return 3;
245 }
246 if (!embFile->save(path: filePath.generic_string())) {
247 error(category: errIO, pos: -1, msg: "Error saving embedded file as '{0:s}'", filePath.c_str());
248 return 2;
249 }
250 }
251
252 // save an embedded file
253 } else {
254 if (hasSaveFile) {
255 for (i = 0; i < nFiles; ++i) {
256 const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i];
257 s1 = fileSpec->getFileName();
258 if (strcmp(s1: s1->c_str(), s2: saveFile) == 0) {
259 saveNum = i + 1;
260 break;
261 }
262 }
263 }
264 if (saveNum < 1 || saveNum > nFiles) {
265 error(category: errCommandLine, pos: -1, msg: hasSaveFile ? "Invalid file name" : "Invalid file number");
266 return 99;
267 }
268
269 const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[saveNum - 1];
270 std::string targetPath = savePath;
271 if (targetPath.empty()) {
272 // The user hasn't given a path to save, just use the filename specified in the pdf as name
273 s1 = fileSpec->getFileName();
274 if (!s1) {
275 return 3;
276 }
277 if (hasUnicodeByteOrderMark(s: s1->toStr())) {
278 isUnicode = true;
279 j = 2;
280 } else {
281 isUnicode = false;
282 j = 0;
283 }
284 while (j < s1->getLength()) {
285 if (isUnicode) {
286 u = ((s1->getChar(i: j) & 0xff) << 8) | (s1->getChar(i: j + 1) & 0xff);
287 j += 2;
288 } else {
289 u = pdfDocEncoding[s1->getChar(i: j) & 0xff];
290 ++j;
291 }
292 n = uMap->mapUnicode(u, buf: uBuf, bufSize: sizeof(uBuf));
293 targetPath.append(s: uBuf, n: n);
294 }
295
296 const std::filesystem::path basePath = std::filesystem::current_path().lexically_normal();
297 std::filesystem::path filePath = basePath;
298 filePath = filePath.append(source: targetPath).lexically_normal();
299
300 if (!filePath.generic_string().starts_with(x: basePath.generic_string())) {
301 error(category: errIO, pos: -1, msg: "Preventing directory traversal");
302 return 3;
303 }
304 targetPath = filePath.generic_string();
305 }
306
307 auto *embFile = fileSpec->getEmbeddedFile();
308 if (!embFile || !embFile->isOk()) {
309 return 3;
310 }
311 if (!embFile->save(path: targetPath)) {
312 error(category: errIO, pos: -1, msg: "Error saving embedded file as '{0:s}'", targetPath.c_str());
313 return 2;
314 }
315 }
316
317 return 0;
318}
319

source code of poppler/utils/pdfdetach.cc