1 | //======================================================================== |
2 | // |
3 | // FontInfo.cc |
4 | // |
5 | // Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com> |
6 | // Copyright (C) 2005-2008, 2010, 2017-2020, 2023 Albert Astals Cid <aacid@kde.org> |
7 | // Copyright (C) 2005 Brad Hards <bradh@frogmouth.net> |
8 | // Copyright (C) 2006 Kouhei Sutou <kou@cozmixng.org> |
9 | // Copyright (C) 2009 Pino Toscano <pino@kde.org> |
10 | // Copyright (C) 2010 Hib Eris <hib@hiberis.nl> |
11 | // Copyright (C) 2010, 2012 Adrian Johnson <ajohnson@redneon.com> |
12 | // Copyright (C) 2010, 2013 Thomas Freitag <Thomas.Freitag@alfa.de> |
13 | // Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org> |
14 | // Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it> |
15 | // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich |
16 | // Copyright (C) 2018, 2019 Adam Reichold <adam.reichold@t-online.de> |
17 | // Copyright (C) 2019, 2021, 2022 Oliver Sander <oliver.sander@tu-dresden.de> |
18 | // Copyright (C) 2023 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp> |
19 | // |
20 | // To see a description of the changes please see the Changelog file that |
21 | // came with your tarball or type make ChangeLog if you are building from git |
22 | // |
23 | //======================================================================== |
24 | |
25 | //======================================================================== |
26 | // |
27 | // Based on code from pdffonts.cc |
28 | // |
29 | // Copyright 2001-2007 Glyph & Cog, LLC |
30 | // |
31 | //======================================================================== |
32 | |
33 | #include "config.h" |
34 | #include <cstdio> |
35 | #include <cstdlib> |
36 | #include <cstddef> |
37 | #include <cstring> |
38 | #include <cmath> |
39 | #include "GlobalParams.h" |
40 | #include "Error.h" |
41 | #include "Object.h" |
42 | #include "Dict.h" |
43 | #include "GfxFont.h" |
44 | #include "Annot.h" |
45 | #include "PDFDoc.h" |
46 | #include "FontInfo.h" |
47 | |
48 | FontInfoScanner::FontInfoScanner(PDFDoc *docA, int firstPage) |
49 | { |
50 | doc = docA; |
51 | currentPage = firstPage + 1; |
52 | } |
53 | |
54 | FontInfoScanner::~FontInfoScanner() { } |
55 | |
56 | std::vector<FontInfo *> FontInfoScanner::scan(int nPages) |
57 | { |
58 | Page *page; |
59 | Dict *resDict; |
60 | Annots *annots; |
61 | int lastPage; |
62 | |
63 | std::vector<FontInfo *> result; |
64 | |
65 | if (currentPage > doc->getNumPages()) { |
66 | return result; |
67 | } |
68 | |
69 | lastPage = currentPage + nPages; |
70 | if (lastPage > doc->getNumPages() + 1) { |
71 | lastPage = doc->getNumPages() + 1; |
72 | } |
73 | |
74 | std::unique_ptr<XRef> xrefA(doc->getXRef()->copy()); |
75 | for (int pg = currentPage; pg < lastPage; ++pg) { |
76 | page = doc->getPage(page: pg); |
77 | if (!page) { |
78 | continue; |
79 | } |
80 | |
81 | if ((resDict = page->getResourceDictCopy(xrefA: xrefA.get()))) { |
82 | scanFonts(xrefA: xrefA.get(), resDict, fontsList: &result); |
83 | delete resDict; |
84 | } |
85 | annots = page->getAnnots(); |
86 | for (Annot *annot : annots->getAnnots()) { |
87 | Object obj1 = annot->getAppearanceResDict(); |
88 | if (obj1.isDict()) { |
89 | scanFonts(xrefA: xrefA.get(), resDict: obj1.getDict(), fontsList: &result); |
90 | } |
91 | } |
92 | } |
93 | |
94 | currentPage = lastPage; |
95 | |
96 | return result; |
97 | } |
98 | |
99 | void FontInfoScanner::scanFonts(XRef *xrefA, Dict *resDict, std::vector<FontInfo *> *fontsList) |
100 | { |
101 | GfxFontDict *gfxFontDict; |
102 | |
103 | // scan the fonts in this resource dictionary |
104 | gfxFontDict = nullptr; |
105 | const Object &fontObj = resDict->lookupNF(key: "Font" ); |
106 | if (fontObj.isRef()) { |
107 | Object obj2 = fontObj.fetch(xref: xrefA); |
108 | if (obj2.isDict()) { |
109 | Ref r = fontObj.getRef(); |
110 | gfxFontDict = new GfxFontDict(xrefA, &r, obj2.getDict()); |
111 | } |
112 | } else if (fontObj.isDict()) { |
113 | gfxFontDict = new GfxFontDict(xrefA, nullptr, fontObj.getDict()); |
114 | } |
115 | if (gfxFontDict) { |
116 | for (int i = 0; i < gfxFontDict->getNumFonts(); ++i) { |
117 | if (const std::shared_ptr<GfxFont> &font = gfxFontDict->getFont(i)) { |
118 | Ref fontRef = *font->getID(); |
119 | |
120 | // add this font to the list if not already found |
121 | if (fonts.insert(x: fontRef.num).second) { |
122 | fontsList->push_back(x: new FontInfo(font.get(), xrefA)); |
123 | } |
124 | } |
125 | } |
126 | delete gfxFontDict; |
127 | } |
128 | |
129 | // recursively scan any resource dictionaries in objects in this |
130 | // resource dictionary |
131 | const char *resTypes[] = { "XObject" , "Pattern" }; |
132 | for (const char *resType : resTypes) { |
133 | Ref objDictRef; |
134 | Object objDict = resDict->lookup(key: resType, returnRef: &objDictRef); |
135 | if (!visitedObjects.insert(ref: objDictRef)) { |
136 | continue; |
137 | } |
138 | if (objDict.isDict()) { |
139 | for (int i = 0; i < objDict.dictGetLength(); ++i) { |
140 | Ref obj2Ref; |
141 | const Object obj2 = objDict.getDict()->getVal(i, returnRef: &obj2Ref); |
142 | // check for an already-seen object |
143 | if (!visitedObjects.insert(ref: obj2Ref)) { |
144 | continue; |
145 | } |
146 | |
147 | if (obj2.isStream()) { |
148 | Ref resourcesRef; |
149 | const Object resObj = obj2.streamGetDict()->lookup(key: "Resources" , returnRef: &resourcesRef); |
150 | if (!visitedObjects.insert(ref: resourcesRef)) { |
151 | continue; |
152 | } |
153 | |
154 | if (resObj.isDict() && resObj.getDict() != resDict) { |
155 | scanFonts(xrefA, resDict: resObj.getDict(), fontsList); |
156 | } |
157 | } |
158 | } |
159 | } |
160 | } |
161 | } |
162 | |
163 | FontInfo::FontInfo(GfxFont *font, XRef *xref) |
164 | { |
165 | fontRef = *font->getID(); |
166 | |
167 | // font name |
168 | const std::optional<std::string> &origName = font->getName(); |
169 | if (origName) { |
170 | name = *font->getName(); |
171 | } |
172 | |
173 | // font type |
174 | type = (FontInfo::Type)font->getType(); |
175 | |
176 | // check for an embedded font |
177 | if (font->getType() == fontType3) { |
178 | emb = true; |
179 | embRef = Ref::INVALID(); |
180 | } else { |
181 | emb = font->getEmbeddedFontID(embID: &embRef); |
182 | } |
183 | |
184 | if (!emb) { |
185 | GooString substituteNameAux; |
186 | const std::optional<GfxFontLoc> fontLoc = font->locateFont(xref, ps: nullptr, substituteFontName: &substituteNameAux); |
187 | if (fontLoc && fontLoc->locType == gfxFontLocExternal) { |
188 | file = fontLoc->path; |
189 | } |
190 | if (substituteNameAux.getLength() > 0) { |
191 | substituteName = substituteNameAux.toStr(); |
192 | } |
193 | } |
194 | encoding = font->getEncodingName(); |
195 | |
196 | // look for a ToUnicode map |
197 | hasToUnicode = false; |
198 | Object fontObj = xref->fetch(ref: fontRef); |
199 | if (fontObj.isDict()) { |
200 | hasToUnicode = fontObj.dictLookup(key: "ToUnicode" ).isStream(); |
201 | } |
202 | |
203 | // check for a font subset name: capital letters followed by a '+' |
204 | // sign |
205 | subset = font->isSubset(); |
206 | } |
207 | |