1 | #![allow (rustc::default_hash_types, rustc::potential_query_instability)] |
2 | |
3 | // Derived from code in LLVM, which is: |
4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | // See https://llvm.org/LICENSE.txt for license information. |
6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | |
8 | use std::collections::{BTreeMap, HashMap}; |
9 | use std::io::{self, Cursor, Seek, Write}; |
10 | use std::mem::size_of; |
11 | |
12 | use crate::alignment::*; |
13 | use crate::archive::*; |
14 | use crate::coff_import_file; |
15 | use crate::math_extras::align_to_power_of2; |
16 | use crate::ObjectReader; |
17 | |
18 | const BIG_AR_MEM_HDR_SIZE: u64 = { |
19 | // `try_into` is not const, so check the size manually. |
20 | assert!( |
21 | std::mem::size_of::<usize>() <= std::mem::size_of::<u64>() |
22 | || std::mem::size_of::<big_archive::BigArMemHdrType>() < u64::MAX as usize |
23 | ); |
24 | std::mem::size_of::<big_archive::BigArMemHdrType>() as u64 |
25 | }; |
26 | |
27 | #[derive (Default)] |
28 | struct SymMap { |
29 | use_ec_map: bool, |
30 | map: BTreeMap<Box<[u8]>, u16>, |
31 | ec_map: BTreeMap<Box<[u8]>, u16>, |
32 | } |
33 | |
34 | pub struct NewArchiveMember<'a> { |
35 | pub buf: Box<dyn AsRef<[u8]> + 'a>, |
36 | pub object_reader: &'static ObjectReader, |
37 | pub member_name: String, |
38 | pub mtime: u64, |
39 | pub uid: u32, |
40 | pub gid: u32, |
41 | pub perms: u32, |
42 | } |
43 | |
44 | impl<'a> NewArchiveMember<'a> { |
45 | pub fn new<T: AsRef<[u8]> + 'a>( |
46 | buf: T, |
47 | object_reader: &'static ObjectReader, |
48 | member_name: String, |
49 | ) -> Self { |
50 | Self { |
51 | buf: Box::new(buf), |
52 | object_reader, |
53 | member_name, |
54 | mtime: 0, |
55 | uid: 0, |
56 | gid: 0, |
57 | perms: 0o644, |
58 | } |
59 | } |
60 | } |
61 | |
62 | fn is_darwin(kind: ArchiveKind) -> bool { |
63 | matches!(kind, ArchiveKind::Darwin | ArchiveKind::Darwin64) |
64 | } |
65 | |
66 | fn is_aix_big_archive(kind: ArchiveKind) -> bool { |
67 | kind == ArchiveKind::AixBig |
68 | } |
69 | |
70 | fn is_coff_archive(kind: ArchiveKind) -> bool { |
71 | kind == ArchiveKind::Coff |
72 | } |
73 | |
74 | fn is_bsd_like(kind: ArchiveKind) -> bool { |
75 | match kind { |
76 | ArchiveKind::Gnu | ArchiveKind::Gnu64 | ArchiveKind::AixBig | ArchiveKind::Coff => false, |
77 | ArchiveKind::Bsd | ArchiveKind::Darwin | ArchiveKind::Darwin64 => true, |
78 | } |
79 | } |
80 | |
81 | fn print_rest_of_member_header<W: Write>( |
82 | w: &mut W, |
83 | mtime: u64, |
84 | uid: u32, |
85 | gid: u32, |
86 | perms: u32, |
87 | size: u64, |
88 | ) -> io::Result<()> { |
89 | // The format has only 6 chars for uid and gid. Truncate if the provided |
90 | // values don't fit. |
91 | write!( |
92 | w, |
93 | " {:<12}{:<6}{:<6}{:<8o}{:<10}` \n" , |
94 | mtime, |
95 | uid % 1000000, |
96 | gid % 1000000, |
97 | perms, |
98 | size |
99 | ) |
100 | } |
101 | |
102 | fn print_gnu_small_member_header<W: Write>( |
103 | w: &mut W, |
104 | name: String, |
105 | mtime: u64, |
106 | uid: u32, |
107 | gid: u32, |
108 | perms: u32, |
109 | size: u64, |
110 | ) -> io::Result<()> { |
111 | write!(w, " {:<16}" , name + "/" )?; |
112 | print_rest_of_member_header(w, mtime, uid, gid, perms, size) |
113 | } |
114 | |
115 | fn print_bsd_member_header<W: Write>( |
116 | w: &mut W, |
117 | pos: u64, |
118 | name: &str, |
119 | mtime: u64, |
120 | uid: u32, |
121 | gid: u32, |
122 | perms: u32, |
123 | size: u64, |
124 | ) -> io::Result<()> { |
125 | let pos_after_header: u64 = pos + 60 + u64::try_from(name.len()).unwrap(); |
126 | // Pad so that even 64 bit object files are aligned. |
127 | let pad: u64 = offset_to_alignment(value:pos_after_header, alignment:8); |
128 | let name_with_padding: u64 = u64::try_from(name.len()).unwrap() + pad; |
129 | write!(w, "#1/ {:<13}" , name_with_padding)?; |
130 | print_rest_of_member_header(w, mtime, uid, gid, perms, size:name_with_padding + size)?; |
131 | write!(w, " {}" , name)?; |
132 | write!( |
133 | w, |
134 | " {nil:\0<pad$}" , |
135 | nil = "" , |
136 | pad = usize::try_from(pad).unwrap() |
137 | ) |
138 | } |
139 | |
140 | fn print_big_archive_member_header<W: Write>( |
141 | w: &mut W, |
142 | name: &str, |
143 | mtime: u64, |
144 | uid: u32, |
145 | gid: u32, |
146 | perms: u32, |
147 | size: u64, |
148 | prev_offset: u64, |
149 | next_offset: u64, |
150 | ) -> io::Result<()> { |
151 | write!( |
152 | w, |
153 | " {:<20}{:<20}{:<20}{:<12}{:<12}{:<12}{:<12o}{:<4}" , |
154 | size, |
155 | next_offset, |
156 | prev_offset, |
157 | mtime, |
158 | u64::from(uid) % 1000000000000u64, |
159 | u64::from(gid) % 1000000000000u64, |
160 | perms, |
161 | name.len(), |
162 | )?; |
163 | |
164 | if !name.is_empty() { |
165 | write!(w, " {}" , name)?; |
166 | |
167 | if name.len() % 2 != 0 { |
168 | write!(w, " \0" )?; |
169 | } |
170 | } |
171 | |
172 | write!(w, "` \n" )?; |
173 | |
174 | Ok(()) |
175 | } |
176 | |
177 | fn use_string_table(thin: bool, name: &str) -> bool { |
178 | thin || name.len() >= 16 || name.contains('/' ) |
179 | } |
180 | |
181 | fn is_64bit_kind(kind: ArchiveKind) -> bool { |
182 | match kind { |
183 | ArchiveKind::Gnu | ArchiveKind::Bsd | ArchiveKind::Darwin | ArchiveKind::Coff => false, |
184 | ArchiveKind::AixBig | ArchiveKind::Darwin64 | ArchiveKind::Gnu64 => true, |
185 | } |
186 | } |
187 | |
188 | fn print_member_header<'m, W: Write, T: Write + Seek>( |
189 | w: &mut W, |
190 | pos: u64, |
191 | string_table: &mut T, |
192 | member_names: &mut HashMap<&'m str, u64>, |
193 | kind: ArchiveKind, |
194 | thin: bool, |
195 | m: &'m NewArchiveMember<'m>, |
196 | mtime: u64, |
197 | size: u64, |
198 | ) -> io::Result<()> { |
199 | if is_bsd_like(kind) { |
200 | return print_bsd_member_header(w, pos, &m.member_name, mtime, m.uid, m.gid, m.perms, size); |
201 | } |
202 | |
203 | if !use_string_table(thin, &m.member_name) { |
204 | return print_gnu_small_member_header( |
205 | w, |
206 | m.member_name.clone(), |
207 | mtime, |
208 | m.uid, |
209 | m.gid, |
210 | m.perms, |
211 | size, |
212 | ); |
213 | } |
214 | |
215 | write!(w, "/" )?; |
216 | let name_pos; |
217 | if thin { |
218 | name_pos = string_table.stream_position()?; |
219 | write!(string_table, " {}/ \n" , m.member_name)?; |
220 | } else if let Some(&pos) = member_names.get(&*m.member_name) { |
221 | name_pos = pos; |
222 | } else { |
223 | name_pos = string_table.stream_position()?; |
224 | member_names.insert(&m.member_name, name_pos); |
225 | write!(string_table, " {}" , m.member_name)?; |
226 | if is_coff_archive(kind) { |
227 | write!(string_table, " \0" )?; |
228 | } else { |
229 | write!(string_table, "/ \n" )?; |
230 | } |
231 | } |
232 | write!(w, " {:<15}" , name_pos)?; |
233 | print_rest_of_member_header(w, mtime, m.uid, m.gid, m.perms, size) |
234 | } |
235 | |
236 | struct MemberData<'a> { |
237 | symbols: Vec<u64>, |
238 | header: Vec<u8>, |
239 | data: &'a [u8], |
240 | padding: &'static [u8], |
241 | pre_head_pad_size: u64, |
242 | object_reader: &'static ObjectReader, |
243 | } |
244 | |
245 | fn compute_string_table(names: &[u8]) -> MemberData<'_> { |
246 | let size: u64 = u64::try_from(names.len()).unwrap(); |
247 | let pad: u64 = offset_to_alignment(value:size, alignment:2); |
248 | let mut header: Vec = Vec::new(); |
249 | write!(header, " {:<48}" , "//" ).unwrap(); |
250 | write!(header, " {:<10}" , size + pad).unwrap(); |
251 | write!(header, "` \n" ).unwrap(); |
252 | MemberData { |
253 | symbols: vec![], |
254 | header, |
255 | data: names, |
256 | padding: if pad != 0 { b" \n" } else { b"" }, |
257 | pre_head_pad_size: 0, |
258 | object_reader: &crate::DEFAULT_OBJECT_READER, |
259 | } |
260 | } |
261 | |
262 | const fn now() -> u64 { |
263 | 0 |
264 | } |
265 | |
266 | // NOTE: isArchiveSymbol was moved to object_reader.rs |
267 | |
268 | fn print_n_bits<W: Write>(w: &mut W, kind: ArchiveKind, val: u64) -> io::Result<()> { |
269 | if is_64bit_kind(kind) { |
270 | w.write_all(&if is_bsd_like(kind) { |
271 | u64::to_le_bytes(self:val) |
272 | } else { |
273 | u64::to_be_bytes(self:val) |
274 | }) |
275 | } else { |
276 | w.write_all(&if is_bsd_like(kind) { |
277 | u32::to_le_bytes(self:u32::try_from(val).unwrap()) |
278 | } else { |
279 | u32::to_be_bytes(self:u32::try_from(val).unwrap()) |
280 | }) |
281 | } |
282 | } |
283 | |
284 | fn compute_symbol_table_size_and_pad( |
285 | kind: ArchiveKind, |
286 | num_syms: u64, |
287 | offset_size: u64, |
288 | string_table_size: u64, |
289 | ) -> (u64, u64) { |
290 | assert!( |
291 | offset_size == 4 || offset_size == 8, |
292 | "Unsupported offset_size" |
293 | ); |
294 | let mut size = offset_size; // Number of entries |
295 | if is_bsd_like(kind) { |
296 | size += num_syms * offset_size * 2; // Table |
297 | } else { |
298 | size += num_syms * offset_size; // Table |
299 | } |
300 | if is_bsd_like(kind) { |
301 | size += offset_size; // byte count; |
302 | } |
303 | size += string_table_size; |
304 | // ld64 expects the members to be 8-byte aligned for 64-bit content and at |
305 | // least 4-byte aligned for 32-bit content. Opt for the larger encoding |
306 | // uniformly. |
307 | // We do this for all bsd formats because it simplifies aligning members. |
308 | let pad = if is_aix_big_archive(kind) { |
309 | 0 |
310 | } else { |
311 | offset_to_alignment(size, if is_bsd_like(kind) { 8 } else { 2 }) |
312 | }; |
313 | size += pad; |
314 | (size, pad) |
315 | } |
316 | |
317 | fn compute_symbol_map_size_and_pad(num_obj: usize, sym_map: &SymMap) -> (u64, u64) { |
318 | let mut size: usize = size_of::<u32>() * 2; // Number of symbols and objects entries |
319 | size += num_obj * size_of::<u32>(); // Offset table |
320 | |
321 | for name: &Box<[u8]> in sym_map.map.keys() { |
322 | size += size_of::<u16>() + name.len() + 1; |
323 | } |
324 | |
325 | let mut size: u64 = u64::try_from(size).unwrap(); |
326 | let pad: u64 = offset_to_alignment(value:size, alignment:2); |
327 | size += pad; |
328 | (size, pad) |
329 | } |
330 | |
331 | fn compute_ec_symbols_size_and_pad(sym_map: &SymMap) -> (u64, u64) { |
332 | let mut size: usize = size_of::<u32>(); // Number of symbols |
333 | |
334 | for name: &Box<[u8]> in sym_map.ec_map.keys() { |
335 | size += size_of::<u16>() + name.len() + 1; |
336 | } |
337 | |
338 | let mut size: u64 = u64::try_from(size).unwrap(); |
339 | let pad: u64 = offset_to_alignment(value:size, alignment:2); |
340 | size += pad; |
341 | (size, pad) |
342 | } |
343 | |
344 | fn write_symbol_table_header<W: Write + Seek>( |
345 | w: &mut W, |
346 | kind: ArchiveKind, |
347 | size: u64, |
348 | prev_member_offset: u64, |
349 | next_member_offset: u64, |
350 | ) -> io::Result<()> { |
351 | if is_bsd_like(kind) { |
352 | let name = if is_64bit_kind(kind) { |
353 | "__.SYMDEF_64" |
354 | } else { |
355 | "__.SYMDEF" |
356 | }; |
357 | let pos = w.stream_position()?; |
358 | print_bsd_member_header(w, pos, name, now(), 0, 0, 0, size) |
359 | } else if is_aix_big_archive(kind) { |
360 | print_big_archive_member_header( |
361 | w, |
362 | "" , |
363 | now(), |
364 | 0, |
365 | 0, |
366 | 0, |
367 | size, |
368 | prev_member_offset, |
369 | next_member_offset, |
370 | ) |
371 | } else { |
372 | let name = if is_64bit_kind(kind) { "/SYM64" } else { "" }; |
373 | print_gnu_small_member_header(w, name.to_string(), now(), 0, 0, 0, size) |
374 | } |
375 | } |
376 | |
377 | fn compute_headers_size( |
378 | kind: ArchiveKind, |
379 | num_members: usize, |
380 | string_member_size: u64, |
381 | num_syms: u64, |
382 | sym_names_size: u64, |
383 | sym_map: Option<&SymMap>, |
384 | ) -> io::Result<u64> { |
385 | let offset_size: u64 = if is_64bit_kind(kind) { 8 } else { 4 }; |
386 | let (symtab_size: u64, _) = |
387 | compute_symbol_table_size_and_pad(kind, num_syms, offset_size, string_table_size:sym_names_size); |
388 | let compute_symbol_table_header_size: impl Fn() -> Result = || -> io::Result<u64> { |
389 | let mut tmp: Cursor> = Cursor::new(inner:Vec::new()); |
390 | write_symbol_table_header(&mut tmp, kind, symtab_size, prev_member_offset:0, next_member_offset:0)?; |
391 | Ok(tmp.into_inner().len().try_into().unwrap()) |
392 | }; |
393 | let header_size: u64 = compute_symbol_table_header_size()?; |
394 | let mut size: u64 = u64::try_from("!<arch> \n" .len()).unwrap() + header_size + symtab_size; |
395 | |
396 | if let Some(sym_map: &SymMap) = sym_map { |
397 | size += header_size + compute_symbol_map_size_and_pad(num_obj:num_members, sym_map).0; |
398 | if !sym_map.ec_map.is_empty() { |
399 | size += header_size + compute_ec_symbols_size_and_pad(sym_map).0; |
400 | } |
401 | } |
402 | |
403 | Ok(size + string_member_size) |
404 | } |
405 | |
406 | // NOTE: is64BitSymbolicFile, getAuxMaxAlignment and getMemberAlignment were |
407 | // moved to object_reader.rs |
408 | |
409 | fn write_symbol_table<W: Write + Seek>( |
410 | w: &mut W, |
411 | kind: ArchiveKind, |
412 | members: &[MemberData<'_>], |
413 | string_table: &[u8], |
414 | members_offset: u64, |
415 | num_syms: u64, |
416 | prev_member_offset: u64, |
417 | next_member_offset: u64, |
418 | is_64_bit: bool, |
419 | ) -> io::Result<()> { |
420 | // We don't write a symbol table on an archive with no members -- except on |
421 | // Darwin, where the linker will abort unless the archive has a symbol table. |
422 | if string_table.is_empty() && !is_darwin(kind) && !is_coff_archive(kind) { |
423 | return Ok(()); |
424 | } |
425 | |
426 | let offset_size = if is_64bit_kind(kind) { 8 } else { 4 }; |
427 | let (size, pad) = compute_symbol_table_size_and_pad( |
428 | kind, |
429 | num_syms, |
430 | offset_size, |
431 | string_table.len().try_into().unwrap(), |
432 | ); |
433 | write_symbol_table_header(w, kind, size, prev_member_offset, next_member_offset)?; |
434 | |
435 | if is_bsd_like(kind) { |
436 | print_n_bits(w, kind, num_syms * 2 * offset_size)?; |
437 | } else { |
438 | print_n_bits(w, kind, num_syms)?; |
439 | } |
440 | |
441 | let mut pos = members_offset; |
442 | for m in members { |
443 | if is_aix_big_archive(kind) { |
444 | pos += m.pre_head_pad_size; |
445 | if (m.object_reader.is_64_bit_object_file)(m.data) != is_64_bit { |
446 | pos += u64::try_from(m.header.len() + m.data.len() + m.padding.len()).unwrap(); |
447 | continue; |
448 | } |
449 | } |
450 | |
451 | for &string_offset in &m.symbols { |
452 | if is_bsd_like(kind) { |
453 | print_n_bits(w, kind, string_offset)?; |
454 | } |
455 | print_n_bits(w, kind, pos)?; // member offset |
456 | } |
457 | pos += u64::try_from(m.header.len() + m.data.len() + m.padding.len()).unwrap(); |
458 | } |
459 | |
460 | if is_bsd_like(kind) { |
461 | // byte count of the string table |
462 | print_n_bits(w, kind, u64::try_from(string_table.len()).unwrap())?; |
463 | } |
464 | |
465 | w.write_all(string_table)?; |
466 | |
467 | write!( |
468 | w, |
469 | " {nil:\0<pad$}" , |
470 | nil = "" , |
471 | pad = usize::try_from(pad).unwrap() |
472 | ) |
473 | } |
474 | |
475 | fn write_symbol_map<W: Write + Seek>( |
476 | w: &mut W, |
477 | kind: ArchiveKind, |
478 | members: &[MemberData<'_>], |
479 | sym_map: &SymMap, |
480 | members_offset: u64, |
481 | ) -> io::Result<()> { |
482 | let (size, pad) = compute_symbol_map_size_and_pad(members.len(), sym_map); |
483 | write_symbol_table_header(w, kind, size, 0, 0)?; |
484 | |
485 | let mut pos: u32 = members_offset.try_into().unwrap(); |
486 | |
487 | w.write_all(&u32::try_from(members.len()).unwrap().to_le_bytes())?; |
488 | for m in members { |
489 | w.write_all(&pos.to_le_bytes())?; // member offset |
490 | pos = pos |
491 | .checked_add( |
492 | (m.header.len() + m.data.len() + m.padding.len()) |
493 | .try_into() |
494 | .unwrap(), |
495 | ) |
496 | .unwrap(); |
497 | } |
498 | |
499 | w.write_all(&u32::try_from(sym_map.map.len()).unwrap().to_le_bytes())?; |
500 | |
501 | for s in sym_map.map.values() { |
502 | w.write_all(&s.to_le_bytes())?; |
503 | } |
504 | for s in sym_map.map.keys() { |
505 | w.write_all(s)?; |
506 | w.write_all(&[0])?; |
507 | } |
508 | |
509 | write!( |
510 | w, |
511 | " {nil:\0<pad$}" , |
512 | nil = "" , |
513 | pad = usize::try_from(pad).unwrap() |
514 | )?; |
515 | Ok(()) |
516 | } |
517 | |
518 | fn write_ec_symbols<W: Write + Seek>(w: &mut W, sym_map: &SymMap) -> io::Result<()> { |
519 | let (size: u64, pad: u64) = compute_ec_symbols_size_and_pad(sym_map); |
520 | print_gnu_small_member_header(w, name:"/<ECSYMBOLS>" .to_string(), mtime:now(), uid:0, gid:0, perms:0, size)?; |
521 | |
522 | w.write_all(&u32::try_from(sym_map.ec_map.len()).unwrap().to_le_bytes())?; |
523 | |
524 | for s: &u16 in sym_map.ec_map.values() { |
525 | w.write_all(&s.to_le_bytes())?; |
526 | } |
527 | for s: &Box<[u8]> in sym_map.ec_map.keys() { |
528 | w.write_all(buf:s)?; |
529 | w.write_all(&[0])?; |
530 | } |
531 | |
532 | write!( |
533 | w, |
534 | " {nil:\0<pad$}" , |
535 | nil = "" , |
536 | pad = usize::try_from(pad).unwrap() |
537 | )?; |
538 | Ok(()) |
539 | } |
540 | |
541 | // NOTE: isECObject was moved to object_reader.rs |
542 | |
543 | fn is_import_descriptor(name: &[u8]) -> bool { |
544 | name.starts_with(needle:coff_import_file::IMPORT_DESCRIPTOR_PREFIX) |
545 | || name.starts_with(needle:coff_import_file::NULL_IMPORT_DESCRIPTOR_SYMBOL_NAME) |
546 | || (name.starts_with(needle:coff_import_file::NULL_THUNK_DATA_PREFIX) |
547 | && name.ends_with(needle:coff_import_file::NULL_THUNK_DATA_SUFFIX)) |
548 | } |
549 | |
550 | // NOTE: LLVM calls this getSymbols and has the get_native_object_symbols |
551 | // function (moved to object_reader.rs) inlined. |
552 | fn write_symbols( |
553 | obj: &[u8], |
554 | index: u16, |
555 | sym_names: &mut Cursor<Vec<u8>>, |
556 | sym_map: &mut Option<&mut SymMap>, |
557 | object_reader: &ObjectReader, |
558 | ) -> io::Result<Vec<u64>> { |
559 | let mut ret = vec![]; |
560 | |
561 | let mut is_using_map = false; |
562 | let (mut map, mut ec_map) = if let Some(sym_map) = sym_map { |
563 | if sym_map.use_ec_map && (object_reader.is_ec_object_file)(obj) { |
564 | (Some(&mut sym_map.ec_map), None) |
565 | } else { |
566 | is_using_map = true; |
567 | ( |
568 | Some(&mut sym_map.map), |
569 | sym_map.use_ec_map.then_some(&mut sym_map.ec_map), |
570 | ) |
571 | } |
572 | } else { |
573 | (None, None) |
574 | }; |
575 | |
576 | (object_reader.get_symbols)(obj, &mut |name| { |
577 | if let Some(map) = &mut map { |
578 | let entry = map.entry(name.to_vec().into_boxed_slice()); |
579 | if matches!(entry, std::collections::btree_map::Entry::Occupied(_)) { |
580 | return Ok(()); // ignore duplicated symbol |
581 | } |
582 | entry.or_insert(index); |
583 | |
584 | if is_using_map { |
585 | ret.push(sym_names.stream_position()?); |
586 | sym_names.write_all(name)?; |
587 | sym_names.write_all(&[0])?; |
588 | |
589 | // If EC is enabled, then the import descriptors are NOT put into EC |
590 | // objects so we need to copy them to the EC map manually. |
591 | if let Some(ec_map) = &mut ec_map { |
592 | if is_import_descriptor(name) { |
593 | ec_map.insert(name.to_vec().into_boxed_slice(), index); |
594 | } |
595 | } |
596 | } |
597 | } else { |
598 | ret.push(sym_names.stream_position()?); |
599 | sym_names.write_all(name)?; |
600 | sym_names.write_all(&[0])?; |
601 | } |
602 | Ok(()) |
603 | })?; |
604 | Ok(ret) |
605 | } |
606 | |
607 | fn compute_member_data<'a, S: Write + Seek>( |
608 | string_table: &mut S, |
609 | sym_names: &mut Cursor<Vec<u8>>, |
610 | kind: ArchiveKind, |
611 | thin: bool, |
612 | sym_map: &mut Option<&mut SymMap>, |
613 | new_members: &'a [NewArchiveMember<'a>], |
614 | ) -> io::Result<Vec<MemberData<'a>>> { |
615 | const PADDING_DATA: &[u8; 8] = &[b' \n' ; 8]; |
616 | |
617 | let mut mem_head_pad_size = 0; |
618 | // This ignores the symbol table, but we only need the value mod 8 and the |
619 | // symbol table is aligned to be a multiple of 8 bytes |
620 | let mut pos = if is_aix_big_archive(kind) { |
621 | u64::try_from(std::mem::size_of::<big_archive::FixLenHdr>()).unwrap() |
622 | } else { |
623 | 0 |
624 | }; |
625 | |
626 | let mut ret = vec![]; |
627 | let mut has_object = false; |
628 | |
629 | // Deduplicate long member names in the string table and reuse earlier name |
630 | // offsets. This especially saves space for COFF Import libraries where all |
631 | // members have the same name. |
632 | let mut member_names = HashMap::<&str, u64>::new(); |
633 | |
634 | // UniqueTimestamps is a special case to improve debugging on Darwin: |
635 | // |
636 | // The Darwin linker does not link debug info into the final |
637 | // binary. Instead, it emits entries of type N_OSO in the output |
638 | // binary's symbol table, containing references to the linked-in |
639 | // object files. Using that reference, the debugger can read the |
640 | // debug data directly from the object files. Alternatively, an |
641 | // invocation of 'dsymutil' will link the debug data from the object |
642 | // files into a dSYM bundle, which can be loaded by the debugger, |
643 | // instead of the object files. |
644 | // |
645 | // For an object file, the N_OSO entries contain the absolute path |
646 | // path to the file, and the file's timestamp. For an object |
647 | // included in an archive, the path is formatted like |
648 | // "/absolute/path/to/archive.a(member.o)", and the timestamp is the |
649 | // archive member's timestamp, rather than the archive's timestamp. |
650 | // |
651 | // However, this doesn't always uniquely identify an object within |
652 | // an archive -- an archive file can have multiple entries with the |
653 | // same filename. (This will happen commonly if the original object |
654 | // files started in different directories.) The only way they get |
655 | // distinguished, then, is via the timestamp. But this process is |
656 | // unable to find the correct object file in the archive when there |
657 | // are two files of the same name and timestamp. |
658 | // |
659 | // Additionally, timestamp==0 is treated specially, and causes the |
660 | // timestamp to be ignored as a match criteria. |
661 | // |
662 | // That will "usually" work out okay when creating an archive not in |
663 | // deterministic timestamp mode, because the objects will probably |
664 | // have been created at different timestamps. |
665 | // |
666 | // To ameliorate this problem, in deterministic archive mode (which |
667 | // is the default), on Darwin we will emit a unique non-zero |
668 | // timestamp for each entry with a duplicated name. This is still |
669 | // deterministic: the only thing affecting that timestamp is the |
670 | // order of the files in the resultant archive. |
671 | // |
672 | // See also the functions that handle the lookup: |
673 | // in lldb: ObjectContainerBSDArchive::Archive::FindObject() |
674 | // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers(). |
675 | let unique_timestamps = is_darwin(kind); |
676 | let mut filename_count = HashMap::new(); |
677 | if unique_timestamps { |
678 | for m in new_members { |
679 | *filename_count.entry(&*m.member_name).or_insert(0) += 1; |
680 | } |
681 | for (_name, count) in filename_count.iter_mut() { |
682 | *count = if *count > 1 { 1 } else { 0 }; |
683 | } |
684 | } |
685 | |
686 | // The big archive format needs to know the offset of the previous member |
687 | // header. |
688 | let mut prev_offset = 0; |
689 | let mut next_mem_head_pad_size = 0; |
690 | let mut index = 0; |
691 | for m in new_members { |
692 | let mut header = Vec::new(); |
693 | |
694 | let buf = m.buf.as_ref().as_ref(); |
695 | let data = if thin { &[][..] } else { buf }; |
696 | |
697 | index += 1; |
698 | |
699 | // ld64 expects the members to be 8-byte aligned for 64-bit content and at |
700 | // least 4-byte aligned for 32-bit content. Opt for the larger encoding |
701 | // uniformly. This matches the behaviour with cctools and ensures that ld64 |
702 | // is happy with archives that we generate. |
703 | let member_padding = if is_darwin(kind) { |
704 | offset_to_alignment(u64::try_from(data.len()).unwrap(), 8) |
705 | } else { |
706 | 0 |
707 | }; |
708 | let tail_padding = |
709 | offset_to_alignment(u64::try_from(data.len()).unwrap() + member_padding, 2); |
710 | let padding = &PADDING_DATA[..usize::try_from(member_padding + tail_padding).unwrap()]; |
711 | |
712 | let mtime = if unique_timestamps { |
713 | // Increment timestamp for each file of a given name. |
714 | *filename_count.get_mut(&*m.member_name).unwrap() += 1; |
715 | filename_count[&*m.member_name] - 1 |
716 | } else { |
717 | m.mtime |
718 | }; |
719 | |
720 | let size = u64::try_from(buf.len()).unwrap() + member_padding; |
721 | if size > MAX_MEMBER_SIZE { |
722 | return Err(io::Error::new( |
723 | io::ErrorKind::Other, |
724 | format!("Archive member {} is too big" , m.member_name), |
725 | )); |
726 | } |
727 | |
728 | // In the big archive file format, we need to calculate and include the next |
729 | // member offset and previous member offset in the file member header. |
730 | if is_aix_big_archive(kind) { |
731 | let offset_to_mem_data = |
732 | pos + BIG_AR_MEM_HDR_SIZE + align_to(m.member_name.len().try_into().unwrap(), 2); |
733 | |
734 | if index == 1 { |
735 | next_mem_head_pad_size = align_to_power_of2( |
736 | offset_to_mem_data, |
737 | (m.object_reader.get_xcoff_member_alignment)(buf).into(), |
738 | ) - offset_to_mem_data; |
739 | } |
740 | |
741 | mem_head_pad_size = next_mem_head_pad_size; |
742 | pos += mem_head_pad_size; |
743 | let mut next_offset = pos |
744 | + BIG_AR_MEM_HDR_SIZE |
745 | + align_to(u64::try_from(m.member_name.len()).unwrap(), 2) |
746 | + align_to(size, 2); |
747 | |
748 | // If there is another member file after this, we need to calculate the |
749 | // padding before the header. |
750 | if index != new_members.len() { |
751 | let offset_to_next_mem_data = next_offset |
752 | + BIG_AR_MEM_HDR_SIZE |
753 | + align_to(new_members[index].member_name.len().try_into().unwrap(), 2); |
754 | next_mem_head_pad_size = align_to_power_of2( |
755 | offset_to_next_mem_data, |
756 | (m.object_reader.get_xcoff_member_alignment)( |
757 | new_members[index].buf.as_ref().as_ref(), |
758 | ) |
759 | .into(), |
760 | ) - offset_to_next_mem_data; |
761 | next_offset += next_mem_head_pad_size; |
762 | } |
763 | |
764 | print_big_archive_member_header( |
765 | &mut header, |
766 | &m.member_name, |
767 | mtime, |
768 | m.uid, |
769 | m.gid, |
770 | m.perms, |
771 | size, |
772 | prev_offset, |
773 | next_offset, |
774 | )?; |
775 | prev_offset = pos; |
776 | } else { |
777 | print_member_header( |
778 | &mut header, |
779 | pos, |
780 | string_table, |
781 | &mut member_names, |
782 | kind, |
783 | thin, |
784 | m, |
785 | mtime, |
786 | size, |
787 | )?; |
788 | } |
789 | |
790 | let symbols = write_symbols( |
791 | buf, |
792 | index.try_into().unwrap(), |
793 | sym_names, |
794 | sym_map, |
795 | m.object_reader, |
796 | )?; |
797 | has_object = true; |
798 | |
799 | pos += u64::try_from(header.len() + data.len() + padding.len()).unwrap(); |
800 | ret.push(MemberData { |
801 | symbols, |
802 | header, |
803 | data, |
804 | padding, |
805 | pre_head_pad_size: mem_head_pad_size, |
806 | object_reader: m.object_reader, |
807 | }) |
808 | } |
809 | |
810 | // If there are no symbols, emit an empty symbol table, to satisfy Solaris |
811 | // tools, older versions of which expect a symbol table in a non-empty |
812 | // archive, regardless of whether there are any symbols in it. |
813 | if has_object && sym_names.stream_position()? == 0 && !is_coff_archive(kind) { |
814 | write!(sym_names, " \0\0\0" )?; |
815 | } |
816 | |
817 | Ok(ret) |
818 | } |
819 | |
820 | pub fn write_archive_to_stream<'a, W: Write + Seek>( |
821 | w: &mut W, |
822 | new_members: &'a [NewArchiveMember<'a>], |
823 | mut kind: ArchiveKind, |
824 | thin: bool, |
825 | is_ec: bool, |
826 | ) -> io::Result<()> { |
827 | assert!( |
828 | !thin || !is_bsd_like(kind), |
829 | "Only the gnu format has a thin mode" |
830 | ); |
831 | |
832 | let mut sym_names = Cursor::new(Vec::new()); |
833 | let mut string_table = Cursor::new(Vec::new()); |
834 | let mut sym_map = SymMap::default(); |
835 | |
836 | // COFF symbol map uses 16-bit indexes, so we can't use it if there are too |
837 | // many members. |
838 | if is_coff_archive(kind) && new_members.len() > 0xfffe { |
839 | kind = ArchiveKind::Gnu; |
840 | } |
841 | |
842 | sym_map.use_ec_map = is_ec; |
843 | let data = compute_member_data( |
844 | &mut string_table, |
845 | &mut sym_names, |
846 | kind, |
847 | thin, |
848 | &mut is_coff_archive(kind).then_some(&mut sym_map), |
849 | new_members, |
850 | )?; |
851 | |
852 | let sym_names = sym_names.into_inner(); |
853 | |
854 | let mut string_table_size: u64 = 0; |
855 | let mut string_table_member = None; |
856 | let string_table = string_table.into_inner(); |
857 | if !string_table.is_empty() && !is_aix_big_archive(kind) { |
858 | let string_table_temp = compute_string_table(&string_table); |
859 | string_table_size = (string_table_temp.header.len() |
860 | + string_table_temp.data.len() |
861 | + string_table_temp.padding.len()) |
862 | .try_into() |
863 | .unwrap(); |
864 | string_table_member = Some(string_table_temp); |
865 | } |
866 | |
867 | // We would like to detect if we need to switch to a 64-bit symbol table. |
868 | let mut last_member_end_offset = 0; |
869 | let mut last_member_header_offset = 0; |
870 | let mut num_syms = 0; |
871 | let mut num_syms32: u64 = 0; // Store symbol number of 32-bit member files. |
872 | |
873 | for m in &data { |
874 | // Record the start of the member's offset |
875 | last_member_end_offset += m.pre_head_pad_size; |
876 | last_member_header_offset = last_member_end_offset; |
877 | // Account for the size of each part associated with the member. |
878 | last_member_end_offset += |
879 | u64::try_from(m.header.len() + m.data.len() + m.padding.len()).unwrap(); |
880 | num_syms += u64::try_from(m.symbols.len()).unwrap(); |
881 | |
882 | // AIX big archive files may contain two global symbol tables. The |
883 | // first global symbol table locates 32-bit file members that define global |
884 | // symbols; the second global symbol table does the same for 64-bit file |
885 | // members. As a big archive can have both 32-bit and 64-bit file members, |
886 | // we need to know the number of symbols in each symbol table individually. |
887 | if is_aix_big_archive(kind) && !(m.object_reader.is_64_bit_object_file)(m.data) { |
888 | num_syms32 = num_syms32 |
889 | .checked_add(m.symbols.len().try_into().unwrap()) |
890 | .unwrap(); |
891 | } |
892 | } |
893 | |
894 | let mut maybe_headers_size = None; |
895 | |
896 | // The symbol table is put at the end of the big archive file. The symbol |
897 | // table is at the start of the archive file for other archive formats. |
898 | if !is_64bit_kind(kind) { |
899 | // We assume 32-bit offsets to see if 32-bit symbols are possible or not. |
900 | maybe_headers_size = Some(compute_headers_size( |
901 | kind, |
902 | data.len(), |
903 | string_table_size, |
904 | num_syms, |
905 | sym_names.len().try_into().unwrap(), |
906 | is_coff_archive(kind).then_some(&sym_map), |
907 | )?); |
908 | |
909 | // The SYM64 format is used when an archive's member offsets are larger than |
910 | // 32-bits can hold. The need for this shift in format is detected by |
911 | // writeArchive. To test this we need to generate a file with a member that |
912 | // has an offset larger than 32-bits but this demands a very slow test. To |
913 | // speed the test up we use this environment variable to pretend like the |
914 | // cutoff happens before 32-bits and instead happens at some much smaller |
915 | // value. |
916 | // FIXME allow lowering the threshold for tests |
917 | const SYM64_THRESHOLD: u64 = 1 << 32; |
918 | |
919 | // If LastMemberHeaderOffset isn't going to fit in a 32-bit varible we need |
920 | // to switch to 64-bit. Note that the file can be larger than 4GB as long as |
921 | // the last member starts before the 4GB offset. |
922 | if maybe_headers_size.unwrap() + last_member_header_offset >= SYM64_THRESHOLD { |
923 | if kind == ArchiveKind::Darwin { |
924 | kind = ArchiveKind::Darwin64; |
925 | } else { |
926 | kind = ArchiveKind::Gnu64; |
927 | } |
928 | maybe_headers_size = None; |
929 | } |
930 | } |
931 | |
932 | if thin { |
933 | write!(w, "!<thin> \n" )?; |
934 | } else if is_aix_big_archive(kind) { |
935 | write!(w, "<bigaf> \n" )?; |
936 | } else { |
937 | write!(w, "!<arch> \n" )?; |
938 | } |
939 | |
940 | let headers_size; |
941 | if !is_aix_big_archive(kind) { |
942 | headers_size = if let Some(headers_size) = maybe_headers_size { |
943 | headers_size |
944 | } else { |
945 | compute_headers_size( |
946 | kind, |
947 | data.len(), |
948 | string_table_size, |
949 | num_syms, |
950 | sym_names.len().try_into().unwrap(), |
951 | is_coff_archive(kind).then_some(&sym_map), |
952 | )? |
953 | }; |
954 | write_symbol_table( |
955 | w, |
956 | kind, |
957 | &data, |
958 | &sym_names, |
959 | headers_size, |
960 | num_syms, |
961 | 0, |
962 | 0, |
963 | false, |
964 | )?; |
965 | |
966 | if is_coff_archive(kind) { |
967 | write_symbol_map(w, kind, &data, &sym_map, headers_size)?; |
968 | } |
969 | |
970 | if string_table_size > 0 { |
971 | let string_table_member = string_table_member.unwrap(); |
972 | w.write_all(&string_table_member.header)?; |
973 | w.write_all(string_table_member.data)?; |
974 | w.write_all(string_table_member.padding)?; |
975 | } |
976 | |
977 | if !sym_map.ec_map.is_empty() { |
978 | write_ec_symbols(w, &sym_map)?; |
979 | } |
980 | |
981 | for m in data { |
982 | w.write_all(&m.header)?; |
983 | w.write_all(m.data)?; |
984 | w.write_all(m.padding)?; |
985 | } |
986 | } else { |
987 | headers_size = u64::try_from(std::mem::size_of::<big_archive::FixLenHdr>()).unwrap(); |
988 | last_member_end_offset += headers_size; |
989 | last_member_header_offset += headers_size; |
990 | |
991 | // For the big archive (AIX) format, compute a table of member names and |
992 | // offsets, used in the member table. |
993 | let mut member_table_name_str_tbl_size = 0; |
994 | let mut member_offsets = vec![]; |
995 | let mut member_names = vec![]; |
996 | |
997 | // Loop across object to find offset and names. |
998 | let mut member_end_offset = |
999 | u64::try_from(std::mem::size_of::<big_archive::FixLenHdr>()).unwrap(); |
1000 | for i in 0..new_members.len() { |
1001 | let member = &new_members[i]; |
1002 | member_table_name_str_tbl_size += member.member_name.len() + 1; |
1003 | member_end_offset += data[i].pre_head_pad_size; |
1004 | member_offsets.push(member_end_offset); |
1005 | member_names.push(&member.member_name); |
1006 | // File member name ended with "`\n". The length is included in |
1007 | // BigArMemHdrType. |
1008 | member_end_offset += BIG_AR_MEM_HDR_SIZE |
1009 | + align_to(u64::try_from(data[i].data.len()).unwrap(), 2) |
1010 | + align_to(u64::try_from(member.member_name.len()).unwrap(), 2); |
1011 | } |
1012 | |
1013 | // AIX member table size. |
1014 | let member_table_size = |
1015 | u64::try_from(20 + 20 * member_offsets.len() + member_table_name_str_tbl_size).unwrap(); |
1016 | |
1017 | let mut sym_names32 = Cursor::new(Vec::new()); |
1018 | let mut sym_names64 = Cursor::new(Vec::new()); |
1019 | |
1020 | if num_syms > 0 { |
1021 | // Generate the symbol names for the members. |
1022 | for m in &data { |
1023 | write_symbols( |
1024 | m.data, |
1025 | 0, |
1026 | if (m.object_reader.is_64_bit_object_file)(m.data) { |
1027 | &mut sym_names64 |
1028 | } else { |
1029 | &mut sym_names32 |
1030 | }, |
1031 | &mut None, |
1032 | m.object_reader, |
1033 | )?; |
1034 | } |
1035 | } |
1036 | |
1037 | let member_table_end_offset = |
1038 | last_member_end_offset + align_to(BIG_AR_MEM_HDR_SIZE + member_table_size, 2); |
1039 | |
1040 | // In AIX OS, The 'GlobSymOffset' field in the fixed-length header contains |
1041 | // the offset to the 32-bit global symbol table, and the 'GlobSym64Offset' |
1042 | // contains the offset to the 64-bit global symbol table. |
1043 | let global_symbol_offset = if num_syms32 > 0 { |
1044 | member_table_end_offset |
1045 | } else { |
1046 | 0 |
1047 | }; |
1048 | |
1049 | let mut global_symbol_offset64 = 0; |
1050 | let num_syms64 = num_syms - num_syms32; |
1051 | if num_syms64 > 0 { |
1052 | if global_symbol_offset == 0 { |
1053 | global_symbol_offset64 = member_table_end_offset; |
1054 | } else { |
1055 | // If there is a global symbol table for 32-bit members, |
1056 | // the 64-bit global symbol table is after the 32-bit one. |
1057 | global_symbol_offset64 = global_symbol_offset |
1058 | + BIG_AR_MEM_HDR_SIZE |
1059 | + (num_syms32 + 1) * 8 |
1060 | + align_to(sym_names32.get_ref().len().try_into().unwrap(), 2); |
1061 | } |
1062 | } |
1063 | |
1064 | // Fixed Sized Header. |
1065 | // Offset to member table |
1066 | write!( |
1067 | w, |
1068 | " {:<20}" , |
1069 | if !new_members.is_empty() { |
1070 | last_member_end_offset |
1071 | } else { |
1072 | 0 |
1073 | } |
1074 | )?; |
1075 | // If there are no file members in the archive, there will be no global |
1076 | // symbol table. |
1077 | write!(w, " {:<20}" , global_symbol_offset)?; |
1078 | write!(w, " {:<20}" , global_symbol_offset64)?; |
1079 | // Offset to first archive member |
1080 | write!( |
1081 | w, |
1082 | " {:<20}" , |
1083 | if !new_members.is_empty() { |
1084 | u64::try_from(std::mem::size_of::<big_archive::FixLenHdr>()).unwrap() |
1085 | + data[0].pre_head_pad_size |
1086 | } else { |
1087 | 0 |
1088 | } |
1089 | )?; |
1090 | // Offset to last archive member |
1091 | write!( |
1092 | w, |
1093 | " {:<20}" , |
1094 | if !new_members.is_empty() { |
1095 | last_member_header_offset |
1096 | } else { |
1097 | 0 |
1098 | } |
1099 | )?; |
1100 | // Offset to first member of free list - Not supported yet |
1101 | write!(w, " {:<20}" , 0)?; |
1102 | |
1103 | for m in &data { |
1104 | write!( |
1105 | w, |
1106 | " {nil:\0<pad$}" , |
1107 | nil = "" , |
1108 | pad = usize::try_from(m.pre_head_pad_size).unwrap() |
1109 | )?; |
1110 | w.write_all(&m.header)?; |
1111 | w.write_all(m.data)?; |
1112 | if m.data.len() % 2 != 0 { |
1113 | w.write_all(&[0])?; |
1114 | } |
1115 | } |
1116 | |
1117 | if !new_members.is_empty() { |
1118 | // Member table. |
1119 | print_big_archive_member_header( |
1120 | w, |
1121 | "" , |
1122 | 0, |
1123 | 0, |
1124 | 0, |
1125 | 0, |
1126 | member_table_size, |
1127 | last_member_header_offset, |
1128 | if global_symbol_offset != 0 { |
1129 | global_symbol_offset |
1130 | } else { |
1131 | global_symbol_offset64 |
1132 | }, |
1133 | )?; |
1134 | write!(w, " {:<20}" , member_offsets.len())?; // Number of members |
1135 | for member_offset in member_offsets { |
1136 | write!(w, " {:<20}" , member_offset)?; |
1137 | } |
1138 | for member_name in member_names { |
1139 | w.write_all(member_name.as_bytes())?; |
1140 | w.write_all(&[0])?; |
1141 | } |
1142 | |
1143 | if member_table_name_str_tbl_size % 2 != 0 { |
1144 | // Name table must be tail padded to an even number of |
1145 | // bytes. |
1146 | w.write_all(&[0])?; |
1147 | } |
1148 | |
1149 | // Write global symbol table for 32-bit file members. |
1150 | if global_symbol_offset != 0 { |
1151 | write_symbol_table( |
1152 | w, |
1153 | kind, |
1154 | &data, |
1155 | sym_names32.get_ref(), |
1156 | headers_size, |
1157 | num_syms32, |
1158 | last_member_end_offset, |
1159 | global_symbol_offset64, |
1160 | false, |
1161 | )?; |
1162 | // Add padding between the symbol tables, if needed. |
1163 | if global_symbol_offset64 != 0 && (sym_names32.get_ref().len() % 2) != 0 { |
1164 | w.write_all(&[0])?; |
1165 | } |
1166 | } |
1167 | |
1168 | // Write global symbol table for 64-bit file members. |
1169 | if global_symbol_offset64 != 0 { |
1170 | write_symbol_table( |
1171 | w, |
1172 | kind, |
1173 | &data, |
1174 | sym_names64.get_ref(), |
1175 | headers_size, |
1176 | num_syms64, |
1177 | if global_symbol_offset != 0 { |
1178 | global_symbol_offset |
1179 | } else { |
1180 | last_member_end_offset |
1181 | }, |
1182 | 0, |
1183 | true, |
1184 | )?; |
1185 | } |
1186 | } |
1187 | } |
1188 | |
1189 | w.flush() |
1190 | } |
1191 | |