1//========================================================================
2//
3// pdfseparate.cc
4//
5// This file is licensed under the GPLv2 or later
6//
7// Copyright (C) 2011, 2012, 2015 Thomas Freitag <Thomas.Freitag@alfa.de>
8// Copyright (C) 2012-2014, 2017, 2018, 2021, 2022 Albert Astals Cid <aacid@kde.org>
9// Copyright (C) 2013, 2016 Pino Toscano <pino@kde.org>
10// Copyright (C) 2013 Daniel Kahn Gillmor <dkg@fifthhorseman.net>
11// Copyright (C) 2013 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
12// Copyright (C) 2017 LĂ©onard Michelet <leonard.michelet@smile.fr>
13// Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
14// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
15// Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
16//
17//========================================================================
18#include "config.h"
19#include <poppler-config.h>
20#include <cstdio>
21#include <cstdlib>
22#include <cstddef>
23#include <cstring>
24#include "parseargs.h"
25#include "goo/GooString.h"
26#include "PDFDoc.h"
27#include "ErrorCodes.h"
28#include "GlobalParams.h"
29#include "Win32Console.h"
30#include <cctype>
31
32static int firstPage = 0;
33static int lastPage = 0;
34static bool printVersion = false;
35static bool printHelp = false;
36
37static const ArgDesc argDesc[] = { { .arg: "-f", .kind: argInt, .val: &firstPage, .size: 0, .usage: "first page to extract" },
38 { .arg: "-l", .kind: argInt, .val: &lastPage, .size: 0, .usage: "last page to extract" },
39 { .arg: "-v", .kind: argFlag, .val: &printVersion, .size: 0, .usage: "print copyright and version info" },
40 { .arg: "-h", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
41 { .arg: "-help", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
42 { .arg: "--help", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
43 { .arg: "-?", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
44 {} };
45
46static bool extractPages(const char *srcFileName, const char *destFileName)
47{
48 char pathName[4096];
49 PDFDoc *doc = new PDFDoc(std::make_unique<GooString>(args&: srcFileName));
50
51 if (!doc->isOk()) {
52 error(category: errSyntaxError, pos: -1, msg: "Could not extract page(s) from damaged file ('{0:s}')", srcFileName);
53 delete doc;
54 return false;
55 }
56
57 // destFileName can have multiple %% and one %d
58 // We use auxDestFileName to replace all the valid % appearances
59 // by 'A' (random char that is not %), if at the end of replacing
60 // any of the valid appearances there is still any % around, the
61 // pattern is wrong
62 if (firstPage == 0 && lastPage == 0) {
63 firstPage = 1;
64 lastPage = doc->getNumPages();
65 }
66 if (lastPage == 0) {
67 lastPage = doc->getNumPages();
68 }
69 if (firstPage == 0) {
70 firstPage = 1;
71 }
72 if (lastPage < firstPage) {
73 error(category: errCommandLine, pos: -1, msg: "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).", firstPage, lastPage);
74 delete doc;
75 return false;
76 }
77 bool foundmatch = false;
78 char *auxDestFileName = strdup(s: destFileName);
79 char *p = strstr(haystack: auxDestFileName, needle: "%d");
80 if (p != nullptr) {
81 foundmatch = true;
82 *p = 'A';
83 } else {
84 char pattern[6];
85 for (int i = 2; i < 10; i++) {
86 sprintf(s: pattern, format: "%%0%dd", i);
87 p = strstr(haystack: auxDestFileName, needle: pattern);
88 if (p != nullptr) {
89 foundmatch = true;
90 *p = 'A';
91 break;
92 }
93 }
94 }
95 if (!foundmatch && firstPage != lastPage) {
96 error(category: errSyntaxError, pos: -1, msg: "'{0:s}' must contain '%d' (or any variant respecting printf format) if more than one page should be extracted, in order to print the page number", destFileName);
97 free(ptr: auxDestFileName);
98 delete doc;
99 return false;
100 }
101
102 // at this point auxDestFileName can only contain %%
103 p = strstr(haystack: auxDestFileName, needle: "%%");
104 while (p != nullptr) {
105 *p = 'A';
106 *(p + 1) = 'A';
107 p = strstr(haystack: p, needle: "%%");
108 }
109
110 // at this point any other % is wrong
111 p = strstr(haystack: auxDestFileName, needle: "%");
112 if (p != nullptr) {
113 error(category: errSyntaxError, pos: -1, msg: "'{0:s}' can only contain one '%d' pattern", destFileName);
114 free(ptr: auxDestFileName);
115 delete doc;
116 return false;
117 }
118 free(ptr: auxDestFileName);
119
120 for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
121 snprintf(s: pathName, maxlen: sizeof(pathName) - 1, format: destFileName, pageNo);
122 PDFDoc *pagedoc = new PDFDoc(std::make_unique<GooString>(args&: srcFileName));
123 int errCode = pagedoc->savePageAs(name: GooString(pathName), pageNo);
124 if (errCode != errNone) {
125 delete doc;
126 delete pagedoc;
127 return false;
128 }
129 delete pagedoc;
130 }
131 delete doc;
132 return true;
133}
134
135static constexpr int kOtherError = 99;
136
137int main(int argc, char *argv[])
138{
139 // parse args
140 Win32Console win32console(&argc, &argv);
141 const bool parseOK = parseArgs(args: argDesc, argc: &argc, argv);
142 if (!parseOK || argc != 3 || printVersion || printHelp) {
143 fprintf(stderr, format: "pdfseparate version %s\n", PACKAGE_VERSION);
144 fprintf(stderr, format: "%s\n", popplerCopyright);
145 fprintf(stderr, format: "%s\n", xpdfCopyright);
146 if (!printVersion) {
147 printUsage(program: "pdfseparate", otherArgs: "<PDF-sourcefile> <PDF-pattern-destfile>", args: argDesc);
148 }
149 if (printVersion || printHelp) {
150 return 0;
151 } else {
152 return kOtherError;
153 }
154 }
155 globalParams = std::make_unique<GlobalParams>();
156 const bool extractOK = extractPages(srcFileName: argv[1], destFileName: argv[2]);
157 return extractOK ? 0 : kOtherError;
158}
159

source code of poppler/utils/pdfseparate.cc