| 1 | #include <getopt.h> |
| 2 | #include <mach-o/loader.h> |
| 3 | #include <stdio.h> |
| 4 | #include <stdlib.h> |
| 5 | #include <string.h> |
| 6 | #include <sys/stat.h> |
| 7 | |
| 8 | #include <iostream> |
| 9 | #include <optional> |
| 10 | #include <string> |
| 11 | #include <vector> |
| 12 | |
| 13 | using namespace std; |
| 14 | |
| 15 | [[noreturn]] void print_help(void) { |
| 16 | fprintf(stderr, format: "Append an LC_NOTE to a corefile. Usage: \n" ); |
| 17 | fprintf(stderr, format: " -i|--input <corefile>\n" ); |
| 18 | fprintf(stderr, format: " -o|--output <corefile>\n" ); |
| 19 | fprintf(stderr, format: " -n|--name <LC_NOTE name>\n" ); |
| 20 | fprintf( |
| 21 | stderr, |
| 22 | format: " -r|--remove-dups remove existing LC_NOTEs with this same name\n" ); |
| 23 | fprintf(stderr, format: " One of:\n" ); |
| 24 | fprintf(stderr, format: " -f|--file <file to embed as LC_NOTE payload>\n" ); |
| 25 | fprintf(stderr, format: " -s|--str <string to embed as LC_NOTE payload>\n" ); |
| 26 | exit(status: 1); |
| 27 | } |
| 28 | |
| 29 | void parse_args(int argc, char **argv, string &infile, string &outfile, |
| 30 | string ¬e_name, vector<uint8_t> &payload, |
| 31 | bool &remove_dups) { |
| 32 | const char *const short_opts = "i:o:n:f:s:hr" ; |
| 33 | const option long_opts[] = {{.name: "input" , required_argument, .flag: nullptr, .val: 'i'}, |
| 34 | {.name: "output" , required_argument, .flag: nullptr, .val: 'o'}, |
| 35 | {.name: "name" , required_argument, .flag: nullptr, .val: 'n'}, |
| 36 | {.name: "file" , required_argument, .flag: nullptr, .val: 'f'}, |
| 37 | {.name: "str" , required_argument, .flag: nullptr, .val: 's'}, |
| 38 | {.name: "remove-dups" , no_argument, .flag: nullptr, .val: 'r'}, |
| 39 | {.name: "help" , no_argument, .flag: nullptr, .val: 'h'}, |
| 40 | {.name: nullptr, no_argument, .flag: nullptr, .val: 0}}; |
| 41 | optional<string> infile_str, outfile_str, name_str, payload_file_str, |
| 42 | payload_str; |
| 43 | remove_dups = false; |
| 44 | while (true) { |
| 45 | const auto opt = getopt_long(argc: argc, argv: argv, shortopts: short_opts, longopts: long_opts, longind: nullptr); |
| 46 | if (opt == -1) |
| 47 | break; |
| 48 | switch (opt) { |
| 49 | case 'i': |
| 50 | infile_str = optarg; |
| 51 | break; |
| 52 | case 'o': |
| 53 | outfile_str = optarg; |
| 54 | break; |
| 55 | case 'n': |
| 56 | name_str = optarg; |
| 57 | break; |
| 58 | case 'f': |
| 59 | payload_file_str = optarg; |
| 60 | break; |
| 61 | case 's': |
| 62 | payload_str = optarg; |
| 63 | break; |
| 64 | case 'r': |
| 65 | remove_dups = true; |
| 66 | break; |
| 67 | case 'h': |
| 68 | print_help(); |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | if (!infile_str || !outfile_str || !name_str || |
| 73 | (!payload_file_str && !payload_str)) |
| 74 | print_help(); |
| 75 | |
| 76 | infile = *infile_str; |
| 77 | outfile = *outfile_str; |
| 78 | note_name = *name_str; |
| 79 | if (payload_str) { |
| 80 | payload.resize(payload_str->size(), 0); |
| 81 | memcpy(payload.data(), payload_str->c_str(), payload_str->size()); |
| 82 | } else { |
| 83 | struct stat sb; |
| 84 | if (stat(file: payload_file_str->c_str(), buf: &sb)) { |
| 85 | fprintf(stderr, format: "File '%s' does not exist.\n" , payload_file_str->c_str()); |
| 86 | exit(status: 1); |
| 87 | } |
| 88 | payload.resize(sb.st_size, 0); |
| 89 | FILE *f = fopen(filename: payload_file_str->c_str(), modes: "r" ); |
| 90 | fread(payload.data(), 1, sb.st_size, f); |
| 91 | fclose(stream: f); |
| 92 | } |
| 93 | } |
| 94 | |
| 95 | struct { |
| 96 | uint32_t ; // currently 1 |
| 97 | uint32_t ; // number of binary images |
| 98 | uint64_t ; // file offset in the corefile of where the array of |
| 99 | // struct entry's begin. |
| 100 | uint32_t ; // size of 'struct entry'. |
| 101 | uint32_t ; // set to 0 |
| 102 | }; |
| 103 | |
| 104 | struct image_entry { |
| 105 | uint64_t filepath_offset; // corefile offset of the c-string filepath, |
| 106 | // if available, else this should be set |
| 107 | // to UINT64_MAX. |
| 108 | uuid_t uuid; // uint8_t[16]. should be set to all zeroes if |
| 109 | // uuid is unknown. |
| 110 | uint64_t |
| 111 | load_address; // virtual addr of mach-o header, UINT64_MAX if unknown. |
| 112 | uint64_t seg_addrs_offset; // corefile offset to the array of struct |
| 113 | // segment_vmaddr's, UINT64_MAX if none. |
| 114 | uint32_t segment_count; // The number of segments for this binary, 0 if none. |
| 115 | uint32_t |
| 116 | executing; // Set to 0 if executing status is unknown by corefile |
| 117 | // creator. |
| 118 | // Set to 1 if this binary was executing on any thread, |
| 119 | // so it can be force-loaded by the corefile reader. |
| 120 | // Set to 2 if this binary was not executing on any thread. |
| 121 | }; |
| 122 | |
| 123 | int count_lc_notes_with_name(FILE *in, std::string name) { |
| 124 | fseeko(stream: in, off: 0, SEEK_SET); |
| 125 | |
| 126 | uint8_t magic[4]; |
| 127 | if (fread(magic, 1, 4, in) != 4) { |
| 128 | printf(format: "Failed to read magic number\n" ); |
| 129 | return 0; |
| 130 | } |
| 131 | uint8_t magic_32_le[] = {0xce, 0xfa, 0xed, 0xfe}; |
| 132 | uint8_t magic_64_le[] = {0xcf, 0xfa, 0xed, 0xfe}; |
| 133 | |
| 134 | if (memcmp(magic, magic_32_le, 4) != 0 && |
| 135 | memcmp(magic, magic_64_le, 4) != 0) { |
| 136 | return 0; |
| 137 | } |
| 138 | |
| 139 | fseeko(stream: in, off: 0, SEEK_SET); |
| 140 | |
| 141 | int number_of_load_cmds = 0; |
| 142 | size_t = 0; |
| 143 | if (memcmp(magic, magic_64_le, 4) == 0) { |
| 144 | struct mh; |
| 145 | size_of_mach_header = sizeof(mh); |
| 146 | if (fread(&mh, sizeof(mh), 1, in) != 1) { |
| 147 | fprintf(stderr, format: "unable to read mach header\n" ); |
| 148 | return 0; |
| 149 | } |
| 150 | number_of_load_cmds = mh.ncmds; |
| 151 | } else { |
| 152 | struct mh; |
| 153 | size_of_mach_header = sizeof(mh); |
| 154 | if (fread(&mh, sizeof(mh), 1, in) != 1) { |
| 155 | fprintf(stderr, format: "unable to read mach header\n" ); |
| 156 | return 0; |
| 157 | } |
| 158 | number_of_load_cmds = mh.ncmds; |
| 159 | } |
| 160 | |
| 161 | int notes_seen = 0; |
| 162 | fseeko(stream: in, off: size_of_mach_header, SEEK_SET); |
| 163 | for (int i = 0; i < number_of_load_cmds; i++) { |
| 164 | off_t cmd_start = ftello(stream: in); |
| 165 | uint32_t cmd, cmdsize; |
| 166 | fread(&cmd, sizeof(uint32_t), 1, in); |
| 167 | fread(&cmdsize, sizeof(uint32_t), 1, in); |
| 168 | |
| 169 | fseeko(stream: in, off: cmd_start, SEEK_SET); |
| 170 | off_t next_cmd = cmd_start + cmdsize; |
| 171 | if (cmd == LC_NOTE) { |
| 172 | struct note_command note; |
| 173 | fread(¬e, sizeof(note), 1, in); |
| 174 | if (strncmp(name.c_str(), note.data_owner, 16) == 0) |
| 175 | notes_seen++; |
| 176 | } |
| 177 | fseeko(stream: in, off: next_cmd, SEEK_SET); |
| 178 | } |
| 179 | return notes_seen; |
| 180 | } |
| 181 | |
| 182 | void copy_and_add_note(FILE *in, FILE *out, std::string lc_note_name, |
| 183 | vector<uint8_t> payload_data, bool remove_dups) { |
| 184 | int number_of_load_cmds = 0; |
| 185 | off_t = ftello(stream: in); |
| 186 | |
| 187 | int notes_to_remove = 0; |
| 188 | if (remove_dups) |
| 189 | notes_to_remove = count_lc_notes_with_name(in, name: lc_note_name); |
| 190 | fseeko(stream: in, off: header_start, SEEK_SET); |
| 191 | |
| 192 | uint8_t magic[4]; |
| 193 | if (fread(magic, 1, 4, in) != 4) { |
| 194 | printf(format: "Failed to read magic number\n" ); |
| 195 | return; |
| 196 | } |
| 197 | uint8_t magic_32_le[] = {0xce, 0xfa, 0xed, 0xfe}; |
| 198 | uint8_t magic_64_le[] = {0xcf, 0xfa, 0xed, 0xfe}; |
| 199 | |
| 200 | if (memcmp(magic, magic_32_le, 4) != 0 && |
| 201 | memcmp(magic, magic_64_le, 4) != 0) { |
| 202 | return; |
| 203 | } |
| 204 | |
| 205 | fseeko(stream: in, off: header_start, SEEK_SET); |
| 206 | |
| 207 | off_t end_of_infine_loadcmds; |
| 208 | size_t = 0; |
| 209 | if (memcmp(magic, magic_64_le, 4) == 0) { |
| 210 | struct mh; |
| 211 | size_of_mach_header = sizeof(mh); |
| 212 | if (fread(&mh, sizeof(mh), 1, in) != 1) { |
| 213 | fprintf(stderr, format: "unable to read mach header\n" ); |
| 214 | return; |
| 215 | } |
| 216 | number_of_load_cmds = mh.ncmds; |
| 217 | end_of_infine_loadcmds = sizeof(mh) + mh.sizeofcmds; |
| 218 | |
| 219 | mh.ncmds += 1; |
| 220 | mh.ncmds -= notes_to_remove; |
| 221 | mh.sizeofcmds += sizeof(struct note_command); |
| 222 | mh.sizeofcmds -= notes_to_remove * sizeof(struct note_command); |
| 223 | fseeko(stream: out, off: header_start, SEEK_SET); |
| 224 | fwrite(&mh, sizeof(mh), 1, out); |
| 225 | } else { |
| 226 | struct mh; |
| 227 | size_of_mach_header = sizeof(mh); |
| 228 | if (fread(&mh, sizeof(mh), 1, in) != 1) { |
| 229 | fprintf(stderr, format: "unable to read mach header\n" ); |
| 230 | return; |
| 231 | } |
| 232 | number_of_load_cmds = mh.ncmds; |
| 233 | end_of_infine_loadcmds = sizeof(mh) + mh.sizeofcmds; |
| 234 | |
| 235 | mh.ncmds += 1; |
| 236 | mh.ncmds -= notes_to_remove; |
| 237 | mh.sizeofcmds += sizeof(struct note_command); |
| 238 | mh.sizeofcmds -= notes_to_remove * sizeof(struct note_command); |
| 239 | fseeko(stream: out, off: header_start, SEEK_SET); |
| 240 | fwrite(&mh, sizeof(mh), 1, out); |
| 241 | } |
| 242 | |
| 243 | off_t start_of_infile_load_cmds = ftello(stream: in); |
| 244 | fseek(stream: in, off: 0, SEEK_END); |
| 245 | off_t infile_size = ftello(stream: in); |
| 246 | |
| 247 | // LC_SEGMENT may be aligned to 4k boundaries, let's maintain |
| 248 | // that alignment by putting 4096 minus the size of the added |
| 249 | // LC_NOTE load command after the output file's load commands. |
| 250 | off_t end_of_outfile_loadcmds = |
| 251 | end_of_infine_loadcmds - (notes_to_remove * sizeof(struct note_command)) + |
| 252 | 4096 - sizeof(struct note_command); |
| 253 | off_t slide = end_of_outfile_loadcmds - end_of_infine_loadcmds; |
| 254 | |
| 255 | off_t all_image_infos_infile_offset = 0; |
| 256 | |
| 257 | fseek(stream: in, off: start_of_infile_load_cmds, SEEK_SET); |
| 258 | fseek(stream: out, off: start_of_infile_load_cmds, SEEK_SET); |
| 259 | // Copy all the load commands from IN to OUT, updating any file offsets by |
| 260 | // SLIDE. |
| 261 | for (int cmd_num = 0; cmd_num < number_of_load_cmds; cmd_num++) { |
| 262 | off_t cmd_start = ftello(stream: in); |
| 263 | uint32_t cmd, cmdsize; |
| 264 | fread(&cmd, sizeof(uint32_t), 1, in); |
| 265 | fread(&cmdsize, sizeof(uint32_t), 1, in); |
| 266 | |
| 267 | fseeko(stream: in, off: cmd_start, SEEK_SET); |
| 268 | off_t next_cmd = cmd_start + cmdsize; |
| 269 | |
| 270 | switch (cmd) { |
| 271 | case LC_SEGMENT: { |
| 272 | struct segment_command segcmd; |
| 273 | fread(&segcmd, sizeof(segcmd), 1, in); |
| 274 | segcmd.fileoff += slide; |
| 275 | fwrite(&segcmd, cmdsize, 1, out); |
| 276 | } break; |
| 277 | case LC_SEGMENT_64: { |
| 278 | struct segment_command_64 segcmd; |
| 279 | fread(&segcmd, sizeof(segcmd), 1, in); |
| 280 | segcmd.fileoff += slide; |
| 281 | fwrite(&segcmd, cmdsize, 1, out); |
| 282 | } break; |
| 283 | case LC_NOTE: { |
| 284 | struct note_command notecmd; |
| 285 | fread(¬ecmd, sizeof(notecmd), 1, in); |
| 286 | if ((strncmp(lc_note_name.c_str(), notecmd.data_owner, 16) == 0) && |
| 287 | remove_dups) { |
| 288 | fseeko(stream: in, off: next_cmd, SEEK_SET); |
| 289 | continue; |
| 290 | } |
| 291 | if (strncmp("all image infos" , notecmd.data_owner, 16) == 0) |
| 292 | all_image_infos_infile_offset = notecmd.offset; |
| 293 | notecmd.offset += slide; |
| 294 | fwrite(¬ecmd, cmdsize, 1, out); |
| 295 | } break; |
| 296 | default: { |
| 297 | vector<uint8_t> buf(cmdsize); |
| 298 | fread(buf.data(), cmdsize, 1, in); |
| 299 | fwrite(buf.data(), cmdsize, 1, out); |
| 300 | } |
| 301 | } |
| 302 | fseeko(stream: in, off: next_cmd, SEEK_SET); |
| 303 | } |
| 304 | |
| 305 | // Now add our additional LC_NOTE load command. |
| 306 | struct note_command note; |
| 307 | note.cmd = LC_NOTE; |
| 308 | note.cmdsize = sizeof(struct note_command); |
| 309 | memset(¬e.data_owner, 0, 16); |
| 310 | // data_owner may not be nul terminated if all 16 characters |
| 311 | // are used, intentionally using strncpy here. |
| 312 | strncpy(note.data_owner, lc_note_name.c_str(), 16); |
| 313 | note.offset = infile_size + slide; |
| 314 | note.size = payload_data.size(); |
| 315 | fwrite(¬e, sizeof(struct note_command), 1, out); |
| 316 | |
| 317 | fseeko(stream: in, off: end_of_infine_loadcmds, SEEK_SET); |
| 318 | fseeko(stream: out, off: end_of_outfile_loadcmds, SEEK_SET); |
| 319 | |
| 320 | // Copy the rest of the corefile contents |
| 321 | vector<uint8_t> data_buf(1024 * 1024); |
| 322 | while (!feof(stream: in)) { |
| 323 | size_t read_bytes = fread(data_buf.data(), 1, data_buf.size(), in); |
| 324 | if (read_bytes > 0) { |
| 325 | fwrite(data_buf.data(), read_bytes, 1, out); |
| 326 | } else { |
| 327 | break; |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | fwrite(payload_data.data(), payload_data.size(), 1, out); |
| 332 | |
| 333 | // The "all image infos" LC_NOTE payload has file offsets hardcoded |
| 334 | // in it, unfortunately. We've shifted the contents of the corefile |
| 335 | // and these offsets need to be updated in the ouput file. |
| 336 | // Re-copy them into the outfile with corrected file offsets. |
| 337 | off_t infile_image_entry_base = 0; |
| 338 | if (all_image_infos_infile_offset != 0) { |
| 339 | off_t all_image_infos_outfile_offset = |
| 340 | all_image_infos_infile_offset + slide; |
| 341 | fseeko(stream: in, off: all_image_infos_infile_offset, SEEK_SET); |
| 342 | struct all_image_infos_header ; |
| 343 | fread(ptr: &header, size: sizeof(header), n: 1, stream: in); |
| 344 | infile_image_entry_base = header.entries_fileoff; |
| 345 | header.entries_fileoff += slide; |
| 346 | fseeko(stream: out, off: all_image_infos_outfile_offset, SEEK_SET); |
| 347 | fwrite(ptr: &header, size: sizeof(header), n: 1, s: out); |
| 348 | |
| 349 | for (int i = 0; i < header.imgcount; i++) { |
| 350 | off_t infile_entries_fileoff = header.entries_fileoff - slide; |
| 351 | off_t outfile_entries_fileoff = header.entries_fileoff; |
| 352 | |
| 353 | struct image_entry ent; |
| 354 | fseeko(in, infile_entries_fileoff + (header.entry_size * i), SEEK_SET); |
| 355 | fread(ptr: &ent, size: sizeof(ent), n: 1, stream: in); |
| 356 | ent.filepath_offset += slide; |
| 357 | ent.seg_addrs_offset += slide; |
| 358 | fseeko(out, outfile_entries_fileoff + (header.entry_size * i), SEEK_SET); |
| 359 | fwrite(ptr: &ent, size: sizeof(ent), n: 1, s: out); |
| 360 | } |
| 361 | } |
| 362 | } |
| 363 | |
| 364 | int main(int argc, char **argv) { |
| 365 | string infile, outfile, name; |
| 366 | vector<uint8_t> payload; |
| 367 | bool remove_dups; |
| 368 | parse_args(argc, argv, infile, outfile, name, payload, remove_dups); |
| 369 | |
| 370 | FILE *in = fopen(filename: infile.c_str(), modes: "r" ); |
| 371 | if (!in) { |
| 372 | fprintf(stderr, format: "Unable to open %s for reading\n" , infile.c_str()); |
| 373 | exit(status: 1); |
| 374 | } |
| 375 | FILE *out = fopen(filename: outfile.c_str(), modes: "w" ); |
| 376 | if (!out) { |
| 377 | fprintf(stderr, format: "Unable to open %s for reading\n" , outfile.c_str()); |
| 378 | exit(status: 1); |
| 379 | } |
| 380 | |
| 381 | copy_and_add_note(in, out, name, payload, remove_dups); |
| 382 | |
| 383 | fclose(stream: in); |
| 384 | fclose(stream: out); |
| 385 | } |
| 386 | |