run.rs 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. use std::{
  2. ffi::OsString,
  3. fmt::Write as _,
  4. fs::{OpenOptions, copy, create_dir_all},
  5. io::{BufRead as _, BufReader, Write as _},
  6. ops::Deref as _,
  7. path::{Path, PathBuf},
  8. process::{Child, ChildStdin, Command, Output, Stdio},
  9. sync::{Arc, Mutex},
  10. thread,
  11. };
  12. use anyhow::{Context as _, Result, anyhow, bail};
  13. use cargo_metadata::{Artifact, CompilerMessage, Message, Target};
  14. use clap::Parser;
  15. use walkdir::WalkDir;
  16. use xtask::{AYA_BUILD_INTEGRATION_BPF, Errors};
  17. #[derive(Parser)]
  18. enum Environment {
  19. /// Runs the integration tests locally.
  20. Local {
  21. /// The command used to wrap your application.
  22. #[clap(short, long, default_value = "sudo -E")]
  23. runner: String,
  24. },
  25. /// Runs the integration tests in a VM.
  26. VM {
  27. /// The cache directory in which to store intermediate artifacts.
  28. #[clap(long)]
  29. cache_dir: PathBuf,
  30. /// The Github API token to use if network requests to Github are made.
  31. ///
  32. /// This may be required if Github rate limits are exceeded.
  33. #[clap(long)]
  34. github_api_token: Option<String>,
  35. /// The kernel image and modules to use.
  36. ///
  37. /// Format: </path/to/image/vmlinuz>:</path/to/lib/modules>
  38. ///
  39. /// You can download some images with:
  40. ///
  41. /// wget --accept-regex '.*/linux-image-[0-9\.-]+-cloud-.*-unsigned*' \
  42. /// --recursive http://ftp.us.debian.org/debian/pool/main/l/linux/
  43. ///
  44. /// You can then extract the images and kernel modules with:
  45. ///
  46. /// find . -name '*.deb' -print0 \
  47. /// | xargs -0 -I {} sh -c "dpkg --fsys-tarfile {} \
  48. /// | tar --wildcards --extract '**/boot/*' '**/modules/*' --file -"
  49. ///
  50. /// `**/boot/*` is used to extract the kernel image and config.
  51. ///
  52. /// `**/modules/*` is used to extract the kernel modules.
  53. ///
  54. /// Modules are required since not all parts of the kernel we want to
  55. /// test are built-in.
  56. #[clap(required = true, value_parser=parse_image_and_modules)]
  57. image_and_modules: Vec<(PathBuf, PathBuf)>,
  58. },
  59. }
  60. pub(crate) fn parse_image_and_modules(s: &str) -> Result<(PathBuf, PathBuf), std::io::Error> {
  61. let mut parts = s.split(':');
  62. let image = parts
  63. .next()
  64. .ok_or(std::io::ErrorKind::InvalidInput)
  65. .map(PathBuf::from)?;
  66. let modules = parts
  67. .next()
  68. .ok_or(std::io::ErrorKind::InvalidInput)
  69. .map(PathBuf::from)?;
  70. if parts.next().is_some() {
  71. return Err(std::io::ErrorKind::InvalidInput.into());
  72. }
  73. Ok((image, modules))
  74. }
  75. #[derive(Parser)]
  76. pub struct Options {
  77. #[clap(subcommand)]
  78. environment: Environment,
  79. /// Arguments to pass to your application.
  80. #[clap(global = true, last = true)]
  81. run_args: Vec<OsString>,
  82. }
  83. pub fn build<F>(target: Option<&str>, f: F) -> Result<Vec<(String, PathBuf)>>
  84. where
  85. F: FnOnce(&mut Command) -> &mut Command,
  86. {
  87. // Always use rust-lld in case we're cross-compiling.
  88. let mut cmd = Command::new("cargo");
  89. cmd.args(["build", "--message-format=json"]);
  90. if let Some(target) = target {
  91. cmd.args(["--target", target]);
  92. }
  93. f(&mut cmd);
  94. let mut child = cmd
  95. .stdout(Stdio::piped())
  96. .spawn()
  97. .with_context(|| format!("failed to spawn {cmd:?}"))?;
  98. let Child { stdout, .. } = &mut child;
  99. let stdout = stdout.take().unwrap();
  100. let stdout = BufReader::new(stdout);
  101. let mut executables = Vec::new();
  102. for message in Message::parse_stream(stdout) {
  103. #[expect(clippy::collapsible_match)]
  104. match message.context("valid JSON")? {
  105. Message::CompilerArtifact(Artifact {
  106. executable,
  107. target: Target { name, .. },
  108. ..
  109. }) => {
  110. if let Some(executable) = executable {
  111. executables.push((name, executable.into()));
  112. }
  113. }
  114. Message::CompilerMessage(CompilerMessage { message, .. }) => {
  115. for line in message.rendered.unwrap_or_default().split('\n') {
  116. println!("cargo:warning={line}");
  117. }
  118. }
  119. Message::TextLine(line) => {
  120. println!("{line}");
  121. }
  122. _ => {}
  123. }
  124. }
  125. let status = child
  126. .wait()
  127. .with_context(|| format!("failed to wait for {cmd:?}"))?;
  128. if status.code() != Some(0) {
  129. bail!("{cmd:?} failed: {status:?}")
  130. }
  131. Ok(executables)
  132. }
  133. /// Build and run the project.
  134. pub fn run(opts: Options) -> Result<()> {
  135. let Options {
  136. environment,
  137. run_args,
  138. } = opts;
  139. type Binary = (String, PathBuf);
  140. fn binaries(target: Option<&str>) -> Result<Vec<(&str, Vec<Binary>)>> {
  141. ["dev", "release"]
  142. .into_iter()
  143. .map(|profile| {
  144. let binaries = build(target, |cmd| {
  145. cmd.env(AYA_BUILD_INTEGRATION_BPF, "true").args([
  146. "--package",
  147. "integration-test",
  148. "--tests",
  149. "--profile",
  150. profile,
  151. ])
  152. })?;
  153. anyhow::Ok((profile, binaries))
  154. })
  155. .collect()
  156. }
  157. // Use --test-threads=1 to prevent tests from interacting with shared
  158. // kernel state due to the lack of inter-test isolation.
  159. let default_args = [OsString::from("--test-threads=1")];
  160. let run_args = default_args.iter().chain(run_args.iter());
  161. match environment {
  162. Environment::Local { runner } => {
  163. let mut args = runner.trim().split_terminator(' ');
  164. let runner = args.next().ok_or(anyhow!("no first argument"))?;
  165. let args = args.collect::<Vec<_>>();
  166. let binaries = binaries(None)?;
  167. let mut failures = String::new();
  168. for (profile, binaries) in binaries {
  169. for (name, binary) in binaries {
  170. let mut cmd = Command::new(runner);
  171. cmd.args(args.iter())
  172. .arg(binary)
  173. .args(run_args.clone())
  174. .env("RUST_BACKTRACE", "1")
  175. .env("RUST_LOG", "debug");
  176. println!("{profile}:{name} running {cmd:?}");
  177. let status = cmd
  178. .status()
  179. .with_context(|| format!("failed to run {cmd:?}"))?;
  180. if status.code() != Some(0) {
  181. writeln!(&mut failures, "{profile}:{name} failed: {status:?}")
  182. .context("String write failed")?
  183. }
  184. }
  185. }
  186. if failures.is_empty() {
  187. Ok(())
  188. } else {
  189. Err(anyhow!("failures:\n{}", failures))
  190. }
  191. }
  192. Environment::VM {
  193. cache_dir,
  194. github_api_token,
  195. image_and_modules,
  196. } => {
  197. // The user has asked us to run the tests on a VM. This is involved; strap in.
  198. //
  199. // We need tools to build the initramfs; we use gen_init_cpio from the Linux repository,
  200. // taking care to cache it.
  201. //
  202. // Then we iterate the kernel images, using the `file` program to guess the target
  203. // architecture. We then build the init program and our test binaries for that
  204. // architecture, and use gen_init_cpio to build an initramfs containing the test
  205. // binaries. We're almost ready to run the VM.
  206. //
  207. // We consult our OS, our architecture, and the target architecture to determine if
  208. // hardware acceleration is available, and then start QEMU with the provided kernel
  209. // image and the initramfs we built.
  210. //
  211. // We consume the output of QEMU, looking for the output of our init program. This is
  212. // the only way to distinguish success from failure. We batch up the errors across all
  213. // VM images and report to the user. The end.
  214. create_dir_all(&cache_dir).context("failed to create cache dir")?;
  215. let gen_init_cpio = cache_dir.join("gen_init_cpio");
  216. let etag_path = cache_dir.join("gen_init_cpio.etag");
  217. {
  218. let gen_init_cpio_exists = gen_init_cpio.try_exists().with_context(|| {
  219. format!("failed to check existence of {}", gen_init_cpio.display())
  220. })?;
  221. let etag_path_exists = etag_path.try_exists().with_context(|| {
  222. format!("failed to check existence of {}", etag_path.display())
  223. })?;
  224. if !gen_init_cpio_exists && etag_path_exists {
  225. println!(
  226. "cargo:warning=({}).exists()={} != ({})={} (mismatch)",
  227. gen_init_cpio.display(),
  228. gen_init_cpio_exists,
  229. etag_path.display(),
  230. etag_path_exists,
  231. )
  232. }
  233. }
  234. let gen_init_cpio_source = {
  235. drop(github_api_token); // Currently unused, but kept around in case we need it in the future.
  236. let mut curl = Command::new("curl");
  237. curl.args([
  238. "-sfSL",
  239. "https://raw.githubusercontent.com/torvalds/linux/master/usr/gen_init_cpio.c",
  240. ]);
  241. for arg in ["--etag-compare", "--etag-save"] {
  242. curl.arg(arg).arg(&etag_path);
  243. }
  244. let Output {
  245. status,
  246. stdout,
  247. stderr,
  248. } = curl
  249. .output()
  250. .with_context(|| format!("failed to run {curl:?}"))?;
  251. if status.code() != Some(0) {
  252. bail!("{curl:?} failed: stdout={stdout:?} stderr={stderr:?}")
  253. }
  254. stdout
  255. };
  256. if !gen_init_cpio_source.is_empty() {
  257. let mut clang = Command::new("clang");
  258. clang
  259. .args(["-g", "-O2", "-x", "c", "-", "-o"])
  260. .arg(&gen_init_cpio)
  261. .stdin(Stdio::piped());
  262. let mut clang_child = clang
  263. .spawn()
  264. .with_context(|| format!("failed to spawn {clang:?}"))?;
  265. let Child { stdin, .. } = &mut clang_child;
  266. let mut stdin = stdin.take().unwrap();
  267. stdin
  268. .write_all(&gen_init_cpio_source)
  269. .with_context(|| format!("failed to write to {clang:?} stdin"))?;
  270. drop(stdin); // Must explicitly close to signal EOF.
  271. let output = clang_child
  272. .wait_with_output()
  273. .with_context(|| format!("failed to wait for {clang:?}"))?;
  274. let Output { status, .. } = &output;
  275. if status.code() != Some(0) {
  276. bail!("{clang:?} failed: {output:?}")
  277. }
  278. }
  279. let mut errors = Vec::new();
  280. for (kernel_image, modules_dir) in image_and_modules {
  281. // Guess the guest architecture.
  282. let mut cmd = Command::new("file");
  283. let output = cmd
  284. .arg("--brief")
  285. .arg(&kernel_image)
  286. .output()
  287. .with_context(|| format!("failed to run {cmd:?}"))?;
  288. let Output { status, .. } = &output;
  289. if status.code() != Some(0) {
  290. bail!("{cmd:?} failed: {output:?}")
  291. }
  292. let Output { stdout, .. } = output;
  293. // Now parse the output of the file command, which looks something like
  294. //
  295. // - Linux kernel ARM64 boot executable Image, little-endian, 4K pages
  296. //
  297. // - Linux kernel x86 boot executable bzImage, version 6.1.0-10-cloud-amd64 [..]
  298. let stdout = String::from_utf8(stdout)
  299. .with_context(|| format!("invalid UTF-8 in {cmd:?} stdout"))?;
  300. let (_, stdout) = stdout
  301. .split_once("Linux kernel")
  302. .ok_or_else(|| anyhow!("failed to parse {cmd:?} stdout: {stdout}"))?;
  303. let (guest_arch, _) = stdout
  304. .split_once("boot executable")
  305. .ok_or_else(|| anyhow!("failed to parse {cmd:?} stdout: {stdout}"))?;
  306. let guest_arch = guest_arch.trim();
  307. let (guest_arch, machine, cpu, console) = match guest_arch {
  308. "ARM64" => ("aarch64", Some("virt"), Some("max"), "ttyAMA0"),
  309. "x86" => ("x86_64", None, None, "ttyS0"),
  310. guest_arch => (guest_arch, None, None, "ttyS0"),
  311. };
  312. let target = format!("{guest_arch}-unknown-linux-musl");
  313. let test_distro_args =
  314. ["--package", "test-distro", "--release", "--features", "xz2"];
  315. let test_distro: Vec<(String, PathBuf)> =
  316. build(Some(&target), |cmd| cmd.args(test_distro_args))
  317. .context("building test-distro package failed")?;
  318. let binaries = binaries(Some(&target))?;
  319. let tmp_dir = tempfile::tempdir().context("tempdir failed")?;
  320. let initrd_image = tmp_dir.path().join("qemu-initramfs.img");
  321. let initrd_image_file = OpenOptions::new()
  322. .create_new(true)
  323. .write(true)
  324. .open(&initrd_image)
  325. .with_context(|| {
  326. format!("failed to create {} for writing", initrd_image.display())
  327. })?;
  328. let mut gen_init_cpio = Command::new(&gen_init_cpio);
  329. let mut gen_init_cpio_child = gen_init_cpio
  330. .arg("-")
  331. .stdin(Stdio::piped())
  332. .stdout(initrd_image_file)
  333. .spawn()
  334. .with_context(|| format!("failed to spawn {gen_init_cpio:?}"))?;
  335. let Child { stdin, .. } = &mut gen_init_cpio_child;
  336. let stdin = Arc::new(stdin.take().unwrap());
  337. use std::os::unix::ffi::OsStrExt as _;
  338. // Send input into gen_init_cpio for directories
  339. //
  340. // dir /bin 755 0 0
  341. let write_dir = |out_path: &Path| {
  342. for bytes in [
  343. "dir ".as_bytes(),
  344. out_path.as_os_str().as_bytes(),
  345. " ".as_bytes(),
  346. "755 0 0\n".as_bytes(),
  347. ] {
  348. stdin.deref().write_all(bytes).expect("write");
  349. }
  350. };
  351. // Send input into gen_init_cpio for files
  352. //
  353. // file /init path-to-init 755 0 0
  354. let write_file = |out_path: &Path, in_path: &Path, mode: &str| {
  355. for bytes in [
  356. "file ".as_bytes(),
  357. out_path.as_os_str().as_bytes(),
  358. " ".as_bytes(),
  359. in_path.as_os_str().as_bytes(),
  360. " ".as_bytes(),
  361. mode.as_bytes(),
  362. "\n".as_bytes(),
  363. ] {
  364. stdin.deref().write_all(bytes).expect("write");
  365. }
  366. };
  367. write_dir(Path::new("/bin"));
  368. write_dir(Path::new("/sbin"));
  369. write_dir(Path::new("/lib"));
  370. write_dir(Path::new("/lib/modules"));
  371. test_distro.iter().for_each(|(name, path)| {
  372. if name == "init" {
  373. write_file(Path::new("/init"), path, "755 0 0");
  374. } else {
  375. write_file(&Path::new("/sbin").join(name), path, "755 0 0");
  376. }
  377. });
  378. // At this point we need to make a slight detour!
  379. // Preparing the `modules.alias` file inside the VM as part of
  380. // `/init` is slow. It's faster to prepare it here.
  381. Command::new("cargo")
  382. .arg("run")
  383. .args(test_distro_args)
  384. .args(["--bin", "depmod", "--", "-b"])
  385. .arg(&modules_dir)
  386. .status()
  387. .context("failed to run depmod")?;
  388. // Now our modules.alias file is built, we can recursively
  389. // walk the modules directory and add all the files to the
  390. // initramfs.
  391. for entry in WalkDir::new(&modules_dir) {
  392. let entry = entry.context("read_dir failed")?;
  393. let path = entry.path();
  394. let metadata = entry.metadata().context("metadata failed")?;
  395. let out_path = Path::new("/lib/modules").join(
  396. path.strip_prefix(&modules_dir).with_context(|| {
  397. format!(
  398. "strip prefix {} failed for {}",
  399. path.display(),
  400. modules_dir.display()
  401. )
  402. })?,
  403. );
  404. if metadata.file_type().is_dir() {
  405. write_dir(&out_path);
  406. } else if metadata.file_type().is_file() {
  407. write_file(&out_path, path, "644 0 0");
  408. }
  409. }
  410. for (profile, binaries) in binaries {
  411. for (name, binary) in binaries {
  412. let name = format!("{profile}-{name}");
  413. let path = tmp_dir.path().join(&name);
  414. copy(&binary, &path).with_context(|| {
  415. format!("copy({}, {}) failed", binary.display(), path.display())
  416. })?;
  417. let out_path = Path::new("/bin").join(&name);
  418. write_file(&out_path, &path, "755 0 0");
  419. }
  420. }
  421. // Must explicitly close to signal EOF.
  422. drop(stdin);
  423. let output = gen_init_cpio_child
  424. .wait_with_output()
  425. .with_context(|| format!("failed to wait for {gen_init_cpio:?}"))?;
  426. let Output { status, .. } = &output;
  427. if status.code() != Some(0) {
  428. bail!("{gen_init_cpio:?} failed: {output:?}")
  429. }
  430. let mut qemu = Command::new(format!("qemu-system-{guest_arch}"));
  431. if let Some(machine) = machine {
  432. qemu.args(["-machine", machine]);
  433. }
  434. if let Some(cpu) = cpu {
  435. qemu.args(["-cpu", cpu]);
  436. }
  437. for accel in ["kvm", "hvf", "tcg"] {
  438. qemu.args(["-accel", accel]);
  439. }
  440. let console = OsString::from(console);
  441. let mut kernel_args = std::iter::once(("console", &console))
  442. .chain(run_args.clone().map(|run_arg| ("init.arg", run_arg)))
  443. .enumerate()
  444. .fold(OsString::new(), |mut acc, (i, (k, v))| {
  445. if i != 0 {
  446. acc.push(" ");
  447. }
  448. acc.push(k);
  449. acc.push("=");
  450. acc.push(v);
  451. acc
  452. });
  453. // We sometimes see kernel panics containing:
  454. //
  455. // [ 0.064000] Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug and send a report. Then try booting with the 'noapic' option.
  456. //
  457. // Heed the advice and boot with noapic. We don't know why this happens.
  458. kernel_args.push(" noapic");
  459. qemu.args(["-no-reboot", "-nographic", "-m", "512M", "-smp", "2"])
  460. .arg("-append")
  461. .arg(kernel_args)
  462. .arg("-kernel")
  463. .arg(&kernel_image)
  464. .arg("-initrd")
  465. .arg(&initrd_image);
  466. let mut qemu_child = qemu
  467. .stdin(Stdio::piped())
  468. .stdout(Stdio::piped())
  469. .stderr(Stdio::piped())
  470. .spawn()
  471. .with_context(|| format!("failed to spawn {qemu:?}"))?;
  472. let Child {
  473. stdin,
  474. stdout,
  475. stderr,
  476. ..
  477. } = &mut qemu_child;
  478. let stdin = stdin.take().unwrap();
  479. let stdin = Arc::new(Mutex::new(stdin));
  480. let stdout = stdout.take().unwrap();
  481. let stdout = BufReader::new(stdout);
  482. let stderr = stderr.take().unwrap();
  483. let stderr = BufReader::new(stderr);
  484. const TERMINATE_AFTER_COUNT: &[(&str, usize)] = &[
  485. ("end Kernel panic", 0),
  486. ("rcu: RCU grace-period kthread stack dump:", 0),
  487. ("watchdog: BUG: soft lockup", 1),
  488. ];
  489. let mut counts = [0; TERMINATE_AFTER_COUNT.len()];
  490. let mut terminate_if_kernel_hang =
  491. move |line: &str, stdin: &Arc<Mutex<ChildStdin>>| -> anyhow::Result<()> {
  492. if let Some(i) = TERMINATE_AFTER_COUNT
  493. .iter()
  494. .position(|(marker, _)| line.contains(marker))
  495. {
  496. counts[i] += 1;
  497. let (marker, max) = TERMINATE_AFTER_COUNT[i];
  498. if counts[i] > max {
  499. println!("{marker} detected > {max} times; terminating QEMU");
  500. let mut stdin = stdin.lock().unwrap();
  501. stdin
  502. .write_all(&[0x01, b'x'])
  503. .context("failed to write to stdin")?;
  504. println!("waiting for QEMU to terminate");
  505. }
  506. }
  507. Ok(())
  508. };
  509. let stderr = {
  510. let stdin = stdin.clone();
  511. thread::Builder::new()
  512. .spawn(move || {
  513. for line in stderr.lines() {
  514. let line = line.context("failed to read line from stderr")?;
  515. eprintln!("{line}");
  516. terminate_if_kernel_hang(&line, &stdin)?;
  517. }
  518. anyhow::Ok(())
  519. })
  520. .unwrap()
  521. };
  522. let mut outcome = None;
  523. for line in stdout.lines() {
  524. let line = line.context("failed to read line from stdout")?;
  525. println!("{line}");
  526. terminate_if_kernel_hang(&line, &stdin)?;
  527. // The init program will print "init: success" or "init: failure" to indicate
  528. // the outcome of running the binaries it found in /bin.
  529. if let Some(line) = line.strip_prefix("init: ") {
  530. let previous = match line {
  531. "success" => outcome.replace(Ok(())),
  532. "failure" => outcome.replace(Err(())),
  533. line => bail!("unexpected init output: {}", line),
  534. };
  535. if let Some(previous) = previous {
  536. bail!("multiple exit status: previous={previous:?}, current={line}");
  537. }
  538. }
  539. }
  540. let output = qemu_child
  541. .wait_with_output()
  542. .with_context(|| format!("failed to wait for {qemu:?}"))?;
  543. let Output { status, .. } = &output;
  544. if status.code() != Some(0) {
  545. bail!("{qemu:?} failed: {output:?}")
  546. }
  547. stderr.join().unwrap()?;
  548. let outcome = outcome.ok_or(anyhow!("init did not exit"))?;
  549. match outcome {
  550. Ok(()) => {}
  551. Err(()) => {
  552. errors.push(anyhow!("VM binaries failed on {}", kernel_image.display()))
  553. }
  554. }
  555. }
  556. if errors.is_empty() {
  557. Ok(())
  558. } else {
  559. Err(Errors::new(errors).into())
  560. }
  561. }
  562. }
  563. }