run.rs 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. use std::{
  2. env::consts::{ARCH, OS},
  3. ffi::OsString,
  4. fmt::Write as _,
  5. fs::{copy, create_dir_all, OpenOptions},
  6. io::{BufRead as _, BufReader, ErrorKind, Write as _},
  7. path::{Path, PathBuf},
  8. process::{Child, ChildStdin, Command, Output, Stdio},
  9. sync::{Arc, Mutex},
  10. thread,
  11. };
  12. use anyhow::{anyhow, bail, Context as _, Result};
  13. use cargo_metadata::{Artifact, CompilerMessage, Message, Target};
  14. use clap::Parser;
  15. use xtask::{exec, Errors, AYA_BUILD_INTEGRATION_BPF};
  16. #[derive(Parser)]
  17. enum Environment {
  18. /// Runs the integration tests locally.
  19. Local {
  20. /// The command used to wrap your application.
  21. #[clap(short, long, default_value = "sudo -E")]
  22. runner: String,
  23. },
  24. /// Runs the integration tests in a VM.
  25. VM {
  26. /// The kernel images to use.
  27. ///
  28. /// You can download some images with:
  29. ///
  30. /// wget --accept-regex '.*/linux-image-[0-9\.-]+-cloud-.*-unsigned*' \
  31. /// --recursive ftp://ftp.us.debian.org/debian/pool/main/l/linux/
  32. ///
  33. /// You can then extract them with:
  34. ///
  35. /// find . -name '*.deb' -print0 \
  36. /// | xargs -0 -I {} sh -c "dpkg --fsys-tarfile {} \
  37. /// | tar --wildcards --extract '*vmlinuz*' --file -"
  38. #[clap(required = true)]
  39. kernel_image: Vec<PathBuf>,
  40. },
  41. }
  42. #[derive(Parser)]
  43. pub struct Options {
  44. #[clap(subcommand)]
  45. environment: Environment,
  46. /// Arguments to pass to your application.
  47. #[clap(global = true, last = true)]
  48. run_args: Vec<OsString>,
  49. }
  50. pub fn build<F>(target: Option<&str>, f: F) -> Result<Vec<(String, PathBuf)>>
  51. where
  52. F: FnOnce(&mut Command) -> &mut Command,
  53. {
  54. // Always use rust-lld and -Zbuild-std in case we're cross-compiling.
  55. let mut cmd = Command::new("cargo");
  56. cmd.args(["build", "--message-format=json"]);
  57. if let Some(target) = target {
  58. let config = format!("target.{target}.linker = \"rust-lld\"");
  59. cmd.args(["--target", target, "--config", &config]);
  60. }
  61. f(&mut cmd);
  62. let mut child = cmd
  63. .stdout(Stdio::piped())
  64. .spawn()
  65. .with_context(|| format!("failed to spawn {cmd:?}"))?;
  66. let Child { stdout, .. } = &mut child;
  67. let stdout = stdout.take().unwrap();
  68. let stdout = BufReader::new(stdout);
  69. let mut executables = Vec::new();
  70. for message in Message::parse_stream(stdout) {
  71. #[allow(clippy::collapsible_match)]
  72. match message.context("valid JSON")? {
  73. Message::CompilerArtifact(Artifact {
  74. executable,
  75. target: Target { name, .. },
  76. ..
  77. }) => {
  78. if let Some(executable) = executable {
  79. executables.push((name, executable.into()));
  80. }
  81. }
  82. Message::CompilerMessage(CompilerMessage { message, .. }) => {
  83. println!("{message}");
  84. }
  85. Message::TextLine(line) => {
  86. println!("{line}");
  87. }
  88. _ => {}
  89. }
  90. }
  91. let status = child
  92. .wait()
  93. .with_context(|| format!("failed to wait for {cmd:?}"))?;
  94. if status.code() != Some(0) {
  95. bail!("{cmd:?} failed: {status:?}")
  96. }
  97. Ok(executables)
  98. }
  99. /// Build and run the project.
  100. pub fn run(opts: Options) -> Result<()> {
  101. let Options {
  102. environment,
  103. run_args,
  104. } = opts;
  105. type Binary = (String, PathBuf);
  106. fn binaries(target: Option<&str>) -> Result<Vec<(&str, Vec<Binary>)>> {
  107. ["dev", "release"]
  108. .into_iter()
  109. .map(|profile| {
  110. let binaries = build(target, |cmd| {
  111. cmd.env(AYA_BUILD_INTEGRATION_BPF, "true").args([
  112. "--package",
  113. "integration-test",
  114. "--tests",
  115. "--profile",
  116. profile,
  117. ])
  118. })?;
  119. anyhow::Ok((profile, binaries))
  120. })
  121. .collect()
  122. }
  123. // Use --test-threads=1 to prevent tests from interacting with shared
  124. // kernel state due to the lack of inter-test isolation.
  125. let default_args = [OsString::from("--test-threads=1")];
  126. let run_args = default_args.iter().chain(run_args.iter());
  127. match environment {
  128. Environment::Local { runner } => {
  129. let mut args = runner.trim().split_terminator(' ');
  130. let runner = args.next().ok_or(anyhow!("no first argument"))?;
  131. let args = args.collect::<Vec<_>>();
  132. let binaries = binaries(None)?;
  133. let mut failures = String::new();
  134. for (profile, binaries) in binaries {
  135. for (name, binary) in binaries {
  136. let mut cmd = Command::new(runner);
  137. let cmd = cmd.args(args.iter()).arg(binary).args(run_args.clone());
  138. println!("{profile}:{name} running {cmd:?}");
  139. let status = cmd
  140. .status()
  141. .with_context(|| format!("failed to run {cmd:?}"))?;
  142. if status.code() != Some(0) {
  143. writeln!(&mut failures, "{profile}:{name} failed: {status:?}")
  144. .context("String write failed")?
  145. }
  146. }
  147. }
  148. if failures.is_empty() {
  149. Ok(())
  150. } else {
  151. Err(anyhow!("failures:\n{}", failures))
  152. }
  153. }
  154. Environment::VM { kernel_image } => {
  155. // The user has asked us to run the tests on a VM. This is involved; strap in.
  156. //
  157. // We need tools to build the initramfs; we use gen_init_cpio from the Linux repository,
  158. // taking care to cache it.
  159. //
  160. // Then we iterate the kernel images, using the `file` program to guess the target
  161. // architecture. We then build the init program and our test binaries for that
  162. // architecture, and use gen_init_cpio to build an initramfs containing the test
  163. // binaries. We're almost ready to run the VM.
  164. //
  165. // We consult our OS, our architecture, and the target architecture to determine if
  166. // hardware acceleration is available, and then start QEMU with the provided kernel
  167. // image and the initramfs we built.
  168. //
  169. // We consume the output of QEMU, looking for the output of our init program. This is
  170. // the only way to distinguish success from failure. We batch up the errors across all
  171. // VM images and report to the user. The end.
  172. let cache_dir = Path::new("test/.tmp");
  173. create_dir_all(cache_dir).context("failed to create cache dir")?;
  174. let gen_init_cpio = cache_dir.join("gen_init_cpio");
  175. if !gen_init_cpio
  176. .try_exists()
  177. .context("failed to check existence of gen_init_cpio")?
  178. {
  179. let mut curl = Command::new("curl");
  180. curl.args([
  181. "-sfSL",
  182. "https://raw.githubusercontent.com/torvalds/linux/master/usr/gen_init_cpio.c",
  183. ]);
  184. let mut curl_child = curl
  185. .stdout(Stdio::piped())
  186. .spawn()
  187. .with_context(|| format!("failed to spawn {curl:?}"))?;
  188. let Child { stdout, .. } = &mut curl_child;
  189. let curl_stdout = stdout.take().unwrap();
  190. let mut clang = Command::new("clang");
  191. let clang = exec(
  192. clang
  193. .args(["-g", "-O2", "-x", "c", "-", "-o"])
  194. .arg(&gen_init_cpio)
  195. .stdin(curl_stdout),
  196. );
  197. let output = curl_child
  198. .wait_with_output()
  199. .with_context(|| format!("failed to wait for {curl:?}"))?;
  200. let Output { status, .. } = &output;
  201. if status.code() != Some(0) {
  202. bail!("{curl:?} failed: {output:?}")
  203. }
  204. // Check the result of clang *after* checking curl; in case the download failed,
  205. // only curl's output will be useful.
  206. clang?;
  207. }
  208. let mut errors = Vec::new();
  209. for kernel_image in kernel_image {
  210. // Guess the guest architecture.
  211. let mut cmd = Command::new("file");
  212. let output = cmd
  213. .arg("--brief")
  214. .arg(&kernel_image)
  215. .output()
  216. .with_context(|| format!("failed to run {cmd:?}"))?;
  217. let Output { status, .. } = &output;
  218. if status.code() != Some(0) {
  219. bail!("{cmd:?} failed: {output:?}")
  220. }
  221. let Output { stdout, .. } = output;
  222. // Now parse the output of the file command, which looks something like
  223. //
  224. // - Linux kernel ARM64 boot executable Image, little-endian, 4K pages
  225. //
  226. // - Linux kernel x86 boot executable bzImage, version 6.1.0-10-cloud-amd64 [..]
  227. let stdout = String::from_utf8(stdout)
  228. .with_context(|| format!("invalid UTF-8 in {cmd:?} stdout"))?;
  229. let (_, stdout) = stdout
  230. .split_once("Linux kernel")
  231. .ok_or_else(|| anyhow!("failed to parse {cmd:?} stdout: {stdout}"))?;
  232. let (guest_arch, _) = stdout
  233. .split_once("boot executable")
  234. .ok_or_else(|| anyhow!("failed to parse {cmd:?} stdout: {stdout}"))?;
  235. let guest_arch = guest_arch.trim();
  236. let (guest_arch, machine, cpu) = match guest_arch {
  237. "ARM64" => ("aarch64", Some("virt"), Some("cortex-a57")),
  238. "x86" => ("x86_64", Some("q35"), Some("qemu64")),
  239. guest_arch => (guest_arch, None, None),
  240. };
  241. let target = format!("{guest_arch}-unknown-linux-musl");
  242. // Build our init program. The contract is that it will run anything it finds in /bin.
  243. let init = build(Some(&target), |cmd| {
  244. cmd.args(["--package", "init", "--profile", "release"])
  245. })
  246. .context("building init program failed")?;
  247. let init = match &*init {
  248. [(name, init)] => {
  249. if name != "init" {
  250. bail!("expected init program to be named init, found {name}")
  251. }
  252. init
  253. }
  254. init => bail!("expected exactly one init program, found {init:?}"),
  255. };
  256. let binaries = binaries(Some(&target))?;
  257. let tmp_dir = tempfile::tempdir().context("tempdir failed")?;
  258. let initrd_image = tmp_dir.path().join("qemu-initramfs.img");
  259. let initrd_image_file = OpenOptions::new()
  260. .create_new(true)
  261. .write(true)
  262. .open(&initrd_image)
  263. .with_context(|| {
  264. format!("failed to create {} for writing", initrd_image.display())
  265. })?;
  266. let mut gen_init_cpio = Command::new(&gen_init_cpio);
  267. let mut gen_init_cpio_child = gen_init_cpio
  268. .arg("-")
  269. .stdin(Stdio::piped())
  270. .stdout(initrd_image_file)
  271. .spawn()
  272. .with_context(|| format!("failed to spawn {gen_init_cpio:?}"))?;
  273. let Child { stdin, .. } = &mut gen_init_cpio_child;
  274. let mut stdin = stdin.take().unwrap();
  275. use std::os::unix::ffi::OsStrExt as _;
  276. // Send input into gen_init_cpio which looks something like
  277. //
  278. // file /init path-to-init 0755 0 0
  279. // dir /bin 0755 0 0
  280. // file /bin/foo path-to-foo 0755 0 0
  281. // file /bin/bar path-to-bar 0755 0 0
  282. for bytes in [
  283. "file /init ".as_bytes(),
  284. init.as_os_str().as_bytes(),
  285. " 0755 0 0\n".as_bytes(),
  286. "dir /bin 0755 0 0\n".as_bytes(),
  287. ] {
  288. stdin.write_all(bytes).expect("write");
  289. }
  290. for (profile, binaries) in binaries {
  291. for (name, binary) in binaries {
  292. let name = format!("{}-{}", profile, name);
  293. let path = tmp_dir.path().join(&name);
  294. copy(&binary, &path).with_context(|| {
  295. format!("copy({}, {}) failed", binary.display(), path.display())
  296. })?;
  297. for bytes in [
  298. "file /bin/".as_bytes(),
  299. name.as_bytes(),
  300. " ".as_bytes(),
  301. path.as_os_str().as_bytes(),
  302. " 0755 0 0\n".as_bytes(),
  303. ] {
  304. stdin.write_all(bytes).expect("write");
  305. }
  306. }
  307. }
  308. // Must explicitly close to signal EOF.
  309. drop(stdin);
  310. let output = gen_init_cpio_child
  311. .wait_with_output()
  312. .with_context(|| format!("failed to wait for {gen_init_cpio:?}"))?;
  313. let Output { status, .. } = &output;
  314. if status.code() != Some(0) {
  315. bail!("{gen_init_cpio:?} failed: {output:?}")
  316. }
  317. let mut qemu = Command::new(format!("qemu-system-{guest_arch}"));
  318. if let Some(machine) = machine {
  319. qemu.args(["-machine", machine]);
  320. }
  321. if guest_arch == ARCH {
  322. match OS {
  323. "linux" => {
  324. const KVM: &str = "/dev/kvm";
  325. match OpenOptions::new().read(true).write(true).open(KVM) {
  326. Ok(_file) => {
  327. qemu.args(["-accel", "kvm"]);
  328. }
  329. Err(error) => match error.kind() {
  330. ErrorKind::NotFound | ErrorKind::PermissionDenied => {}
  331. _kind => {
  332. return Err(error)
  333. .with_context(|| format!("failed to open {KVM}"));
  334. }
  335. },
  336. }
  337. }
  338. "macos" => {
  339. qemu.args(["-accel", "hvf"]);
  340. }
  341. os => bail!("unsupported OS: {os}"),
  342. }
  343. } else if let Some(cpu) = cpu {
  344. qemu.args(["-cpu", cpu]);
  345. }
  346. let console = OsString::from("ttyS0");
  347. let mut kernel_args = std::iter::once(("console", &console))
  348. .chain(run_args.clone().map(|run_arg| ("init.arg", run_arg)))
  349. .enumerate()
  350. .fold(OsString::new(), |mut acc, (i, (k, v))| {
  351. if i != 0 {
  352. acc.push(" ");
  353. }
  354. acc.push(k);
  355. acc.push("=");
  356. acc.push(v);
  357. acc
  358. });
  359. // We sometimes see kernel panics containing:
  360. //
  361. // [ 0.064000] Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug and send a report. Then try booting with the 'noapic' option.
  362. //
  363. // Heed the advice and boot with noapic. We don't know why this happens.
  364. kernel_args.push(" noapic");
  365. qemu.args(["-no-reboot", "-nographic", "-m", "512M", "-smp", "2"])
  366. .arg("-append")
  367. .arg(kernel_args)
  368. .arg("-kernel")
  369. .arg(&kernel_image)
  370. .arg("-initrd")
  371. .arg(&initrd_image);
  372. if guest_arch == "aarch64" {
  373. match OS {
  374. "linux" => {
  375. let mut cmd = Command::new("locate");
  376. let output = cmd
  377. .arg("QEMU_EFI.fd")
  378. .output()
  379. .with_context(|| format!("failed to run {cmd:?}"))?;
  380. let Output { status, .. } = &output;
  381. if status.code() != Some(0) {
  382. bail!("{qemu:?} failed: {output:?}")
  383. }
  384. let Output { stdout, .. } = output;
  385. let bios = String::from_utf8(stdout)
  386. .with_context(|| format!("failed to parse output of {cmd:?}"))?;
  387. qemu.args(["-bios", bios.trim()]);
  388. }
  389. "macos" => {
  390. let mut cmd = Command::new("brew");
  391. let output = cmd
  392. .args(["list", "qemu", "-1", "-v"])
  393. .output()
  394. .with_context(|| format!("failed to run {cmd:?}"))?;
  395. let Output { status, .. } = &output;
  396. if status.code() != Some(0) {
  397. bail!("{qemu:?} failed: {output:?}")
  398. }
  399. let Output { stdout, .. } = output;
  400. let output = String::from_utf8(stdout)
  401. .with_context(|| format!("failed to parse output of {cmd:?}"))?;
  402. const NAME: &str = "edk2-aarch64-code.fd";
  403. let bios = output.lines().find(|line| line.contains(NAME)).ok_or_else(
  404. || anyhow!("failed to find {NAME} in output of {cmd:?}: {output}"),
  405. )?;
  406. qemu.args(["-bios", bios.trim()]);
  407. }
  408. os => bail!("unsupported OS: {os}"),
  409. };
  410. }
  411. let mut qemu_child = qemu
  412. .stdin(Stdio::piped())
  413. .stdout(Stdio::piped())
  414. .stderr(Stdio::piped())
  415. .spawn()
  416. .with_context(|| format!("failed to spawn {qemu:?}"))?;
  417. let Child {
  418. stdin,
  419. stdout,
  420. stderr,
  421. ..
  422. } = &mut qemu_child;
  423. let stdin = stdin.take().unwrap();
  424. let stdin = Arc::new(Mutex::new(stdin));
  425. let stdout = stdout.take().unwrap();
  426. let stdout = BufReader::new(stdout);
  427. let stderr = stderr.take().unwrap();
  428. let stderr = BufReader::new(stderr);
  429. const TERMINATE_AFTER_COUNT: &[(&str, usize)] = &[
  430. ("end Kernel panic", 0),
  431. ("rcu: RCU grace-period kthread stack dump:", 0),
  432. ("watchdog: BUG: soft lockup", 1),
  433. ];
  434. let mut counts = [0; TERMINATE_AFTER_COUNT.len()];
  435. let mut terminate_if_kernel_hang =
  436. move |line: &str, stdin: &Arc<Mutex<ChildStdin>>| -> anyhow::Result<()> {
  437. if let Some(i) = TERMINATE_AFTER_COUNT
  438. .iter()
  439. .position(|(marker, _)| line.contains(marker))
  440. {
  441. counts[i] += 1;
  442. let (marker, max) = TERMINATE_AFTER_COUNT[i];
  443. if counts[i] > max {
  444. println!("{marker} detected > {max} times; terminating QEMU");
  445. let mut stdin = stdin.lock().unwrap();
  446. stdin
  447. .write_all(&[0x01, b'x'])
  448. .context("failed to write to stdin")?;
  449. println!("waiting for QEMU to terminate");
  450. }
  451. }
  452. Ok(())
  453. };
  454. let stderr = {
  455. let stdin = stdin.clone();
  456. thread::Builder::new()
  457. .spawn(move || {
  458. for line in stderr.lines() {
  459. let line = line.context("failed to read line from stderr")?;
  460. eprintln!("{}", line);
  461. terminate_if_kernel_hang(&line, &stdin)?;
  462. }
  463. anyhow::Ok(())
  464. })
  465. .unwrap()
  466. };
  467. let mut outcome = None;
  468. for line in stdout.lines() {
  469. let line = line.context("failed to read line from stdout")?;
  470. println!("{}", line);
  471. terminate_if_kernel_hang(&line, &stdin)?;
  472. // The init program will print "init: success" or "init: failure" to indicate
  473. // the outcome of running the binaries it found in /bin.
  474. if let Some(line) = line.strip_prefix("init: ") {
  475. let previous = match line {
  476. "success" => outcome.replace(Ok(())),
  477. "failure" => outcome.replace(Err(())),
  478. line => bail!("unexpected init output: {}", line),
  479. };
  480. if let Some(previous) = previous {
  481. bail!("multiple exit status: previous={previous:?}, current={line}");
  482. }
  483. }
  484. }
  485. let output = qemu_child
  486. .wait_with_output()
  487. .with_context(|| format!("failed to wait for {qemu:?}"))?;
  488. let Output { status, .. } = &output;
  489. if status.code() != Some(0) {
  490. bail!("{qemu:?} failed: {output:?}")
  491. }
  492. stderr.join().unwrap()?;
  493. let outcome = outcome.ok_or(anyhow!("init did not exit"))?;
  494. match outcome {
  495. Ok(()) => {}
  496. Err(()) => {
  497. errors.push(anyhow!("VM binaries failed on {}", kernel_image.display()))
  498. }
  499. }
  500. }
  501. if errors.is_empty() {
  502. Ok(())
  503. } else {
  504. Err(Errors::new(errors).into())
  505. }
  506. }
  507. }
  508. }