David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: Apache-2.0 |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 2 | |
| 3 | use std::env; |
| 4 | use std::fs::File; |
| 5 | use std::io::{self, Error, ErrorKind, Read, Seek, SeekFrom}; |
| 6 | use std::path::{Path, PathBuf}; |
| 7 | |
| 8 | use super::common; |
| 9 | |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 10 | //================================================ |
| 11 | // Validation |
| 12 | //================================================ |
| 13 | |
| 14 | /// Extracts the ELF class from the ELF header in a shared library. |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 15 | fn parse_elf_header(path: &Path) -> io::Result<u8> { |
| 16 | let mut file = File::open(path)?; |
| 17 | let mut buffer = [0; 5]; |
| 18 | file.read_exact(&mut buffer)?; |
| 19 | if buffer[..4] == [127, 69, 76, 70] { |
| 20 | Ok(buffer[4]) |
| 21 | } else { |
| 22 | Err(Error::new(ErrorKind::InvalidData, "invalid ELF header")) |
| 23 | } |
| 24 | } |
| 25 | |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 26 | /// Extracts the magic number from the PE header in a shared library. |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 27 | fn parse_pe_header(path: &Path) -> io::Result<u16> { |
| 28 | let mut file = File::open(path)?; |
| 29 | |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 30 | // Extract the header offset. |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 31 | let mut buffer = [0; 4]; |
| 32 | let start = SeekFrom::Start(0x3C); |
| 33 | file.seek(start)?; |
| 34 | file.read_exact(&mut buffer)?; |
| 35 | let offset = i32::from_le_bytes(buffer); |
| 36 | |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 37 | // Check the validity of the header. |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 38 | file.seek(SeekFrom::Start(offset as u64))?; |
| 39 | file.read_exact(&mut buffer)?; |
| 40 | if buffer != [80, 69, 0, 0] { |
| 41 | return Err(Error::new(ErrorKind::InvalidData, "invalid PE header")); |
| 42 | } |
| 43 | |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 44 | // Extract the magic number. |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 45 | let mut buffer = [0; 2]; |
| 46 | file.seek(SeekFrom::Current(20))?; |
| 47 | file.read_exact(&mut buffer)?; |
| 48 | Ok(u16::from_le_bytes(buffer)) |
| 49 | } |
| 50 | |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 51 | /// Checks that a `libclang` shared library matches the target platform. |
| 52 | fn validate_library(path: &Path) -> Result<(), String> { |
| 53 | if cfg!(any(target_os = "linux", target_os = "freebsd")) { |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 54 | let class = parse_elf_header(path).map_err(|e| e.to_string())?; |
| 55 | |
| 56 | if cfg!(target_pointer_width = "32") && class != 1 { |
| 57 | return Err("invalid ELF class (64-bit)".into()); |
| 58 | } |
| 59 | |
| 60 | if cfg!(target_pointer_width = "64") && class != 2 { |
| 61 | return Err("invalid ELF class (32-bit)".into()); |
| 62 | } |
| 63 | |
| 64 | Ok(()) |
| 65 | } else if cfg!(target_os = "windows") { |
| 66 | let magic = parse_pe_header(path).map_err(|e| e.to_string())?; |
| 67 | |
| 68 | if cfg!(target_pointer_width = "32") && magic != 267 { |
| 69 | return Err("invalid DLL (64-bit)".into()); |
| 70 | } |
| 71 | |
| 72 | if cfg!(target_pointer_width = "64") && magic != 523 { |
| 73 | return Err("invalid DLL (32-bit)".into()); |
| 74 | } |
| 75 | |
| 76 | Ok(()) |
| 77 | } else { |
| 78 | Ok(()) |
| 79 | } |
| 80 | } |
| 81 | |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 82 | //================================================ |
| 83 | // Searching |
| 84 | //================================================ |
| 85 | |
| 86 | /// Extracts the version components in a `libclang` shared library filename. |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 87 | fn parse_version(filename: &str) -> Vec<u32> { |
Joel Galenson | eabe835 | 2021-09-22 10:52:39 -0700 | [diff] [blame] | 88 | let version = if let Some(version) = filename.strip_prefix("libclang.so.") { |
| 89 | version |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 90 | } else if filename.starts_with("libclang-") { |
| 91 | &filename[9..filename.len() - 3] |
| 92 | } else { |
| 93 | return vec![]; |
| 94 | }; |
| 95 | |
| 96 | version.split('.').map(|s| s.parse().unwrap_or(0)).collect() |
| 97 | } |
| 98 | |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 99 | /// Finds `libclang` shared libraries and returns the paths to, filenames of, |
| 100 | /// and versions of those shared libraries. |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 101 | fn search_libclang_directories(runtime: bool) -> Result<Vec<(PathBuf, String, Vec<u32>)>, String> { |
| 102 | let mut files = vec![format!( |
| 103 | "{}clang{}", |
| 104 | env::consts::DLL_PREFIX, |
| 105 | env::consts::DLL_SUFFIX |
| 106 | )]; |
| 107 | |
| 108 | if cfg!(target_os = "linux") { |
| 109 | // Some Linux distributions don't create a `libclang.so` symlink, so we |
| 110 | // need to look for versioned files (e.g., `libclang-3.9.so`). |
| 111 | files.push("libclang-*.so".into()); |
| 112 | |
| 113 | // Some Linux distributions don't create a `libclang.so` symlink and |
| 114 | // don't have versioned files as described above, so we need to look for |
| 115 | // suffix versioned files (e.g., `libclang.so.1`). However, `ld` cannot |
| 116 | // link to these files, so this will only be included when linking at |
| 117 | // runtime. |
| 118 | if runtime { |
| 119 | files.push("libclang.so.*".into()); |
| 120 | files.push("libclang-*.so.*".into()); |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | if cfg!(any( |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 125 | target_os = "freebsd", |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 126 | target_os = "haiku", |
| 127 | target_os = "netbsd", |
| 128 | target_os = "openbsd", |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 129 | )) { |
| 130 | // Some BSD distributions don't create a `libclang.so` symlink either, |
| 131 | // but use a different naming scheme for versioned files (e.g., |
| 132 | // `libclang.so.7.0`). |
| 133 | files.push("libclang.so.*".into()); |
| 134 | } |
| 135 | |
| 136 | if cfg!(target_os = "windows") { |
| 137 | // The official LLVM build uses `libclang.dll` on Windows instead of |
| 138 | // `clang.dll`. However, unofficial builds such as MinGW use `clang.dll`. |
| 139 | files.push("libclang.dll".into()); |
| 140 | } |
| 141 | |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 142 | // Find and validate `libclang` shared libraries and collect the versions. |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 143 | let mut valid = vec![]; |
| 144 | let mut invalid = vec![]; |
| 145 | for (directory, filename) in common::search_libclang_directories(&files, "LIBCLANG_PATH") { |
| 146 | let path = directory.join(&filename); |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 147 | match validate_library(&path) { |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 148 | Ok(()) => { |
| 149 | let version = parse_version(&filename); |
| 150 | valid.push((directory, filename, version)) |
| 151 | } |
| 152 | Err(message) => invalid.push(format!("({}: {})", path.display(), message)), |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | if !valid.is_empty() { |
| 157 | return Ok(valid); |
| 158 | } |
| 159 | |
| 160 | let message = format!( |
| 161 | "couldn't find any valid shared libraries matching: [{}], set the \ |
| 162 | `LIBCLANG_PATH` environment variable to a path where one of these files \ |
| 163 | can be found (invalid: [{}])", |
| 164 | files |
| 165 | .iter() |
| 166 | .map(|f| format!("'{}'", f)) |
| 167 | .collect::<Vec<_>>() |
| 168 | .join(", "), |
| 169 | invalid.join(", "), |
| 170 | ); |
| 171 | |
| 172 | Err(message) |
| 173 | } |
| 174 | |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 175 | /// Finds the "best" `libclang` shared library and returns the directory and |
| 176 | /// filename of that library. |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 177 | pub fn find(runtime: bool) -> Result<(PathBuf, String), String> { |
| 178 | search_libclang_directories(runtime)? |
| 179 | .iter() |
Haibo Huang | 1de920c | 2020-11-17 03:54:06 -0800 | [diff] [blame] | 180 | // We want to find the `libclang` shared library with the highest |
| 181 | // version number, hence `max_by_key` below. |
| 182 | // |
| 183 | // However, in the case where there are multiple such `libclang` shared |
| 184 | // libraries, we want to use the order in which they appeared in the |
| 185 | // list returned by `search_libclang_directories` as a tiebreaker since |
| 186 | // that function returns `libclang` shared libraries in descending order |
| 187 | // of preference by how they were found. |
| 188 | // |
| 189 | // `max_by_key`, perhaps surprisingly, returns the *last* item with the |
| 190 | // maximum key rather than the first which results in the opposite of |
| 191 | // the tiebreaking behavior we want. This is easily fixed by reversing |
| 192 | // the list first. |
| 193 | .rev() |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 194 | .max_by_key(|f| &f.2) |
| 195 | .cloned() |
| 196 | .map(|(path, filename, _)| (path, filename)) |
| 197 | .ok_or_else(|| "unreachable".into()) |
| 198 | } |
| 199 | |
David LeGare | 82e2b17 | 2022-03-01 18:53:05 +0000 | [diff] [blame] | 200 | //================================================ |
| 201 | // Linking |
| 202 | //================================================ |
| 203 | |
| 204 | /// Finds and links to a `libclang` shared library. |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 205 | #[cfg(not(feature = "runtime"))] |
| 206 | pub fn link() { |
Haibo Huang | 8b9513e | 2020-07-13 22:05:39 -0700 | [diff] [blame] | 207 | let cep = common::CommandErrorPrinter::default(); |
| 208 | |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 209 | use std::fs; |
| 210 | |
| 211 | let (directory, filename) = find(false).unwrap(); |
| 212 | println!("cargo:rustc-link-search={}", directory.display()); |
| 213 | |
| 214 | if cfg!(all(target_os = "windows", target_env = "msvc")) { |
| 215 | // Find the `libclang` stub static library required for the MSVC |
| 216 | // toolchain. |
| 217 | let lib = if !directory.ends_with("bin") { |
| 218 | directory |
| 219 | } else { |
| 220 | directory.parent().unwrap().join("lib") |
| 221 | }; |
| 222 | |
| 223 | if lib.join("libclang.lib").exists() { |
| 224 | println!("cargo:rustc-link-search={}", lib.display()); |
| 225 | } else if lib.join("libclang.dll.a").exists() { |
| 226 | // MSYS and MinGW use `libclang.dll.a` instead of `libclang.lib`. |
| 227 | // It is linkable with the MSVC linker, but Rust doesn't recognize |
| 228 | // the `.a` suffix, so we need to copy it with a different name. |
| 229 | // |
| 230 | // FIXME: Maybe we can just hardlink or symlink it? |
| 231 | let out = env::var("OUT_DIR").unwrap(); |
| 232 | fs::copy( |
| 233 | lib.join("libclang.dll.a"), |
| 234 | Path::new(&out).join("libclang.lib"), |
| 235 | ) |
| 236 | .unwrap(); |
| 237 | println!("cargo:rustc-link-search=native={}", out); |
| 238 | } else { |
| 239 | panic!( |
| 240 | "using '{}', so 'libclang.lib' or 'libclang.dll.a' must be \ |
| 241 | available in {}", |
| 242 | filename, |
| 243 | lib.display(), |
| 244 | ); |
| 245 | } |
| 246 | |
| 247 | println!("cargo:rustc-link-lib=dylib=libclang"); |
| 248 | } else { |
| 249 | let name = filename.trim_start_matches("lib"); |
| 250 | |
| 251 | // Strip extensions and trailing version numbers (e.g., the `.so.7.0` in |
| 252 | // `libclang.so.7.0`). |
| 253 | let name = match name.find(".dylib").or_else(|| name.find(".so")) { |
| 254 | Some(index) => &name[0..index], |
Joel Galenson | eabe835 | 2021-09-22 10:52:39 -0700 | [diff] [blame] | 255 | None => name, |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 256 | }; |
| 257 | |
| 258 | println!("cargo:rustc-link-lib=dylib={}", name); |
| 259 | } |
Haibo Huang | 8b9513e | 2020-07-13 22:05:39 -0700 | [diff] [blame] | 260 | |
| 261 | cep.discard(); |
Chih-Hung Hsieh | fab4380 | 2020-04-07 14:24:01 -0700 | [diff] [blame] | 262 | } |