| 1 | //! Parse the Linux vDSO. |
| 2 | //! |
| 3 | //! The following code is transliterated from |
| 4 | //! tools/testing/selftests/vDSO/parse_vdso.c in Linux 6.12, which is licensed |
| 5 | //! with Creative Commons Zero License, version 1.0, |
| 6 | //! available at <https://creativecommons.org/publicdomain/zero/1.0/legalcode> |
| 7 | //! |
| 8 | //! # Safety |
| 9 | //! |
| 10 | //! Parsing the vDSO involves a lot of raw pointer manipulation. This |
| 11 | //! implementation follows Linux's reference implementation, and adds several |
| 12 | //! additional safety checks. |
| 13 | #![allow (unsafe_code)] |
| 14 | |
| 15 | use super::c; |
| 16 | use crate::ffi::CStr; |
| 17 | use crate::utils::check_raw_pointer; |
| 18 | use core::ffi::c_void; |
| 19 | use core::mem::size_of; |
| 20 | use core::ptr::{null, null_mut}; |
| 21 | use linux_raw_sys::elf::*; |
| 22 | |
| 23 | #[cfg (target_arch = "s390x" )] |
| 24 | type ElfHashEntry = u64; |
| 25 | #[cfg (not(target_arch = "s390x" ))] |
| 26 | type ElfHashEntry = u32; |
| 27 | |
| 28 | pub(super) struct Vdso { |
| 29 | // Load information |
| 30 | load_addr: *const Elf_Ehdr, |
| 31 | load_end: *const c_void, // the end of the `PT_LOAD` segment |
| 32 | pv_offset: usize, // recorded paddr - recorded vaddr |
| 33 | |
| 34 | // Symbol table |
| 35 | symtab: *const Elf_Sym, |
| 36 | symstrings: *const u8, |
| 37 | bucket: *const ElfHashEntry, |
| 38 | chain: *const ElfHashEntry, |
| 39 | nbucket: ElfHashEntry, |
| 40 | //nchain: ElfHashEntry, |
| 41 | |
| 42 | // Version table |
| 43 | versym: *const u16, |
| 44 | verdef: *const Elf_Verdef, |
| 45 | } |
| 46 | |
| 47 | /// Straight from the ELF specification...and then tweaked slightly, in order to |
| 48 | /// avoid a few clang warnings. |
| 49 | /// (And then translated to Rust). |
| 50 | fn elf_hash(name: &CStr) -> u32 { |
| 51 | let mut h: u32 = 0; |
| 52 | for b: &u8 in name.to_bytes() { |
| 53 | h = (h << 4).wrapping_add(u32::from(*b)); |
| 54 | let g: u32 = h & 0xf000_0000; |
| 55 | if g != 0 { |
| 56 | h ^= g >> 24; |
| 57 | } |
| 58 | h &= !g; |
| 59 | } |
| 60 | h |
| 61 | } |
| 62 | |
| 63 | /// Create a `Vdso` value by parsing the vDSO at the `sysinfo_ehdr` address. |
| 64 | fn init_from_sysinfo_ehdr() -> Option<Vdso> { |
| 65 | // SAFETY: The auxv initialization code does extensive checks to ensure |
| 66 | // that the value we get really is an `AT_SYSINFO_EHDR` value from the |
| 67 | // kernel. |
| 68 | unsafe { |
| 69 | let hdr = super::param::auxv::sysinfo_ehdr(); |
| 70 | |
| 71 | // If the platform doesn't provide a `AT_SYSINFO_EHDR`, we can't locate |
| 72 | // the vDSO. |
| 73 | if hdr.is_null() { |
| 74 | return None; |
| 75 | } |
| 76 | |
| 77 | let mut vdso = Vdso { |
| 78 | load_addr: hdr, |
| 79 | load_end: hdr.cast(), |
| 80 | pv_offset: 0, |
| 81 | symtab: null(), |
| 82 | symstrings: null(), |
| 83 | bucket: null(), |
| 84 | chain: null(), |
| 85 | nbucket: 0, |
| 86 | //nchain: 0, |
| 87 | versym: null(), |
| 88 | verdef: null(), |
| 89 | }; |
| 90 | |
| 91 | let hdr = &*hdr; |
| 92 | let pt = check_raw_pointer::<Elf_Phdr>(vdso.base_plus(hdr.e_phoff)? as *mut _)?.as_ptr(); |
| 93 | let mut dyn_: *const Elf_Dyn = null(); |
| 94 | let mut num_dyn = 0; |
| 95 | |
| 96 | // We need two things from the segment table: the load offset |
| 97 | // and the dynamic table. |
| 98 | let mut found_vaddr = false; |
| 99 | for i in 0..hdr.e_phnum { |
| 100 | let phdr = &*pt.add(i as usize); |
| 101 | if phdr.p_type == PT_LOAD && !found_vaddr { |
| 102 | // The segment should be readable and executable, because it |
| 103 | // contains the symbol table and the function bodies. |
| 104 | if phdr.p_flags & (PF_R | PF_X) != (PF_R | PF_X) { |
| 105 | return None; |
| 106 | } |
| 107 | found_vaddr = true; |
| 108 | vdso.load_end = vdso.base_plus(phdr.p_offset.checked_add(phdr.p_memsz)?)?; |
| 109 | vdso.pv_offset = phdr.p_offset.wrapping_sub(phdr.p_vaddr); |
| 110 | } else if phdr.p_type == PT_DYNAMIC { |
| 111 | // If `p_offset` is zero, it's more likely that we're looking |
| 112 | // at memory that has been zeroed than that the kernel has |
| 113 | // somehow aliased the `Ehdr` and the `Elf_Dyn` array. |
| 114 | if phdr.p_offset < size_of::<Elf_Ehdr>() { |
| 115 | return None; |
| 116 | } |
| 117 | |
| 118 | dyn_ = check_raw_pointer::<Elf_Dyn>(vdso.base_plus(phdr.p_offset)? as *mut _)? |
| 119 | .as_ptr(); |
| 120 | num_dyn = phdr.p_memsz / size_of::<Elf_Dyn>(); |
| 121 | } else if phdr.p_type == PT_INTERP || phdr.p_type == PT_GNU_RELRO { |
| 122 | // Don't trust any ELF image that has an “interpreter” or |
| 123 | // that uses RELRO, which is likely to be a user ELF image |
| 124 | // rather and not the kernel vDSO. |
| 125 | return None; |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | if !found_vaddr || dyn_.is_null() { |
| 130 | return None; // Failed |
| 131 | } |
| 132 | |
| 133 | // Fish out the useful bits of the dynamic table. |
| 134 | let mut hash: *const ElfHashEntry = null(); |
| 135 | vdso.symstrings = null(); |
| 136 | vdso.symtab = null(); |
| 137 | vdso.versym = null(); |
| 138 | vdso.verdef = null(); |
| 139 | let mut i = 0; |
| 140 | loop { |
| 141 | if i == num_dyn { |
| 142 | return None; |
| 143 | } |
| 144 | let d = &*dyn_.add(i); |
| 145 | match d.d_tag { |
| 146 | DT_STRTAB => { |
| 147 | vdso.symstrings = |
| 148 | check_raw_pointer::<u8>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)? |
| 149 | .as_ptr(); |
| 150 | } |
| 151 | DT_SYMTAB => { |
| 152 | vdso.symtab = |
| 153 | check_raw_pointer::<Elf_Sym>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)? |
| 154 | .as_ptr(); |
| 155 | } |
| 156 | DT_HASH => { |
| 157 | hash = check_raw_pointer::<ElfHashEntry>( |
| 158 | vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _ |
| 159 | )? |
| 160 | .as_ptr(); |
| 161 | } |
| 162 | DT_VERSYM => { |
| 163 | vdso.versym = |
| 164 | check_raw_pointer::<u16>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)? |
| 165 | .as_ptr(); |
| 166 | } |
| 167 | DT_VERDEF => { |
| 168 | vdso.verdef = check_raw_pointer::<Elf_Verdef>( |
| 169 | vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _, |
| 170 | )? |
| 171 | .as_ptr(); |
| 172 | } |
| 173 | DT_SYMENT => { |
| 174 | if d.d_un.d_ptr != size_of::<Elf_Sym>() { |
| 175 | return None; // Failed |
| 176 | } |
| 177 | } |
| 178 | DT_NULL => break, |
| 179 | _ => {} |
| 180 | } |
| 181 | i = i.checked_add(1)?; |
| 182 | } |
| 183 | // `check_raw_pointer` will have checked these pointers for null, |
| 184 | // however they could still be null if the expected dynamic table |
| 185 | // entries are absent. |
| 186 | if vdso.symstrings.is_null() || vdso.symtab.is_null() || hash.is_null() { |
| 187 | return None; // Failed |
| 188 | } |
| 189 | |
| 190 | if vdso.verdef.is_null() { |
| 191 | vdso.versym = null(); |
| 192 | } |
| 193 | |
| 194 | // Parse the hash table header. |
| 195 | vdso.nbucket = *hash.add(0); |
| 196 | //vdso.nchain = *hash.add(1); |
| 197 | vdso.bucket = hash.add(2); |
| 198 | vdso.chain = hash.add(vdso.nbucket as usize + 2); |
| 199 | |
| 200 | // That's all we need. |
| 201 | Some(vdso) |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | impl Vdso { |
| 206 | /// Parse the vDSO. |
| 207 | /// |
| 208 | /// Returns `None` if the vDSO can't be located or if it doesn't conform to |
| 209 | /// our expectations. |
| 210 | #[inline ] |
| 211 | pub(super) fn new() -> Option<Self> { |
| 212 | init_from_sysinfo_ehdr() |
| 213 | } |
| 214 | |
| 215 | /// Check the version for a symbol. |
| 216 | /// |
| 217 | /// # Safety |
| 218 | /// |
| 219 | /// The raw pointers inside `self` must be valid. |
| 220 | unsafe fn match_version(&self, mut ver: u16, name: &CStr, hash: u32) -> bool { |
| 221 | // This is a helper function to check if the version indexed by |
| 222 | // ver matches name (which hashes to hash). |
| 223 | // |
| 224 | // The version definition table is a mess, and I don't know how |
| 225 | // to do this in better than linear time without allocating memory |
| 226 | // to build an index. I also don't know why the table has |
| 227 | // variable size entries in the first place. |
| 228 | // |
| 229 | // For added fun, I can't find a comprehensible specification of how |
| 230 | // to parse all the weird flags in the table. |
| 231 | // |
| 232 | // So I just parse the whole table every time. |
| 233 | |
| 234 | // First step: find the version definition |
| 235 | ver &= 0x7fff; // Apparently bit 15 means "hidden" |
| 236 | let mut def = self.verdef; |
| 237 | loop { |
| 238 | if (*def).vd_version != VER_DEF_CURRENT { |
| 239 | return false; // Failed |
| 240 | } |
| 241 | |
| 242 | if ((*def).vd_flags & VER_FLG_BASE) == 0 && ((*def).vd_ndx & 0x7fff) == ver { |
| 243 | break; |
| 244 | } |
| 245 | |
| 246 | if (*def).vd_next == 0 { |
| 247 | return false; // No definition. |
| 248 | } |
| 249 | |
| 250 | def = def |
| 251 | .cast::<u8>() |
| 252 | .add((*def).vd_next as usize) |
| 253 | .cast::<Elf_Verdef>(); |
| 254 | } |
| 255 | |
| 256 | // Now figure out whether it matches. |
| 257 | let aux = &*(def.cast::<u8>()) |
| 258 | .add((*def).vd_aux as usize) |
| 259 | .cast::<Elf_Verdaux>(); |
| 260 | (*def).vd_hash == hash |
| 261 | && (name == CStr::from_ptr(self.symstrings.add(aux.vda_name as usize).cast())) |
| 262 | } |
| 263 | |
| 264 | /// Look up a symbol in the vDSO. |
| 265 | pub(super) fn sym(&self, version: &CStr, name: &CStr) -> *mut c::c_void { |
| 266 | let ver_hash = elf_hash(version); |
| 267 | let name_hash = elf_hash(name); |
| 268 | |
| 269 | // SAFETY: The pointers in `self` must be valid. |
| 270 | unsafe { |
| 271 | let mut chain = *self |
| 272 | .bucket |
| 273 | .add((ElfHashEntry::from(name_hash) % self.nbucket) as usize); |
| 274 | |
| 275 | while chain != ElfHashEntry::from(STN_UNDEF) { |
| 276 | let sym = &*self.symtab.add(chain as usize); |
| 277 | |
| 278 | // Check for a defined global or weak function w/ right name. |
| 279 | // |
| 280 | // Accept `STT_NOTYPE` in addition to `STT_FUNC` for the symbol |
| 281 | // type, for compatibility with some versions of Linux on |
| 282 | // PowerPC64. See [this commit] in Linux for more background. |
| 283 | // |
| 284 | // [this commit]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/tools/testing/selftests/vDSO/parse_vdso.c?id=0161bd38c24312853ed5ae9a425a1c41c4ac674a |
| 285 | if (ELF_ST_TYPE(sym.st_info) != STT_FUNC && |
| 286 | ELF_ST_TYPE(sym.st_info) != STT_NOTYPE) |
| 287 | || (ELF_ST_BIND(sym.st_info) != STB_GLOBAL |
| 288 | && ELF_ST_BIND(sym.st_info) != STB_WEAK) |
| 289 | || sym.st_shndx == SHN_UNDEF |
| 290 | || sym.st_shndx == SHN_ABS |
| 291 | || ELF_ST_VISIBILITY(sym.st_other) != STV_DEFAULT |
| 292 | || (name != CStr::from_ptr(self.symstrings.add(sym.st_name as usize).cast())) |
| 293 | // Check symbol version. |
| 294 | || (!self.versym.is_null() |
| 295 | && !self.match_version(*self.versym.add(chain as usize), version, ver_hash)) |
| 296 | { |
| 297 | chain = *self.chain.add(chain as usize); |
| 298 | continue; |
| 299 | } |
| 300 | |
| 301 | let sum = self.addr_from_elf(sym.st_value).unwrap(); |
| 302 | assert!( |
| 303 | sum as usize >= self.load_addr as usize |
| 304 | && sum as usize <= self.load_end as usize |
| 305 | ); |
| 306 | return sum as *mut c::c_void; |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | null_mut() |
| 311 | } |
| 312 | |
| 313 | /// Add the given address to the vDSO base address. |
| 314 | unsafe fn base_plus(&self, offset: usize) -> Option<*const c_void> { |
| 315 | // Check for overflow. |
| 316 | let _ = (self.load_addr as usize).checked_add(offset)?; |
| 317 | // Add the offset to the base. |
| 318 | Some(self.load_addr.cast::<u8>().add(offset).cast()) |
| 319 | } |
| 320 | |
| 321 | /// Translate an ELF-address-space address into a usable virtual address. |
| 322 | unsafe fn addr_from_elf(&self, elf_addr: usize) -> Option<*const c_void> { |
| 323 | self.base_plus(elf_addr.wrapping_add(self.pv_offset)) |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | #[cfg (linux_raw)] |
| 328 | #[test ] |
| 329 | #[ignore ] // Until rustix is updated to the new vDSO format. |
| 330 | fn test_vdso() { |
| 331 | let vdso = Vdso::new().unwrap(); |
| 332 | assert!(!vdso.symtab.is_null()); |
| 333 | assert!(!vdso.symstrings.is_null()); |
| 334 | |
| 335 | #[cfg (target_arch = "x86_64" )] |
| 336 | let ptr = vdso.sym(cstr!("LINUX_2.6" ), cstr!("__vdso_clock_gettime" )); |
| 337 | #[cfg (target_arch = "arm" )] |
| 338 | let ptr = vdso.sym(cstr!("LINUX_2.6" ), cstr!("__vdso_clock_gettime64" )); |
| 339 | #[cfg (target_arch = "aarch64" )] |
| 340 | let ptr = vdso.sym(cstr!("LINUX_2.6.39" ), cstr!("__kernel_clock_gettime" )); |
| 341 | #[cfg (target_arch = "x86" )] |
| 342 | let ptr = vdso.sym(cstr!("LINUX_2.6" ), cstr!("__vdso_clock_gettime64" )); |
| 343 | #[cfg (target_arch = "riscv64" )] |
| 344 | let ptr = vdso.sym(cstr!("LINUX_4.15" ), cstr!("__vdso_clock_gettime" )); |
| 345 | #[cfg (target_arch = "powerpc64" )] |
| 346 | let ptr = vdso.sym(cstr!("LINUX_2.6.15" ), cstr!("__kernel_clock_gettime" )); |
| 347 | #[cfg (target_arch = "s390x" )] |
| 348 | let ptr = vdso.sym(cstr!("LINUX_2.6.29" ), cstr!("__kernel_clock_gettime" )); |
| 349 | #[cfg (any(target_arch = "mips" , target_arch = "mips32r6" ))] |
| 350 | let ptr = vdso.sym(cstr!("LINUX_2.6" ), cstr!("__vdso_clock_gettime64" )); |
| 351 | #[cfg (any(target_arch = "mips64" , target_arch = "mips64r6" ))] |
| 352 | let ptr = vdso.sym(cstr!("LINUX_2.6" ), cstr!("__vdso_clock_gettime" )); |
| 353 | |
| 354 | assert!(!ptr.is_null()); |
| 355 | } |
| 356 | |