4
0

run.rs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. use std::{
  2. ffi::OsString,
  3. fmt::Write as _,
  4. fs::{copy, create_dir_all, OpenOptions},
  5. io::{BufRead as _, BufReader, Write as _},
  6. path::PathBuf,
  7. process::{Child, ChildStdin, Command, Output, Stdio},
  8. sync::{Arc, Mutex},
  9. thread,
  10. };
  11. use anyhow::{anyhow, bail, Context as _, Result};
  12. use base64::engine::Engine as _;
  13. use cargo_metadata::{Artifact, CompilerMessage, Message, Target};
  14. use clap::Parser;
  15. use xtask::{Errors, AYA_BUILD_INTEGRATION_BPF};
  16. #[derive(Parser)]
  17. enum Environment {
  18. /// Runs the integration tests locally.
  19. Local {
  20. /// The command used to wrap your application.
  21. #[clap(short, long, default_value = "sudo -E")]
  22. runner: String,
  23. },
  24. /// Runs the integration tests in a VM.
  25. VM {
  26. /// The cache directory in which to store intermediate artifacts.
  27. #[clap(long)]
  28. cache_dir: PathBuf,
  29. /// The Github API token to use if network requests to Github are made.
  30. ///
  31. /// This may be required if Github rate limits are exceeded.
  32. #[clap(long)]
  33. github_api_token: Option<String>,
  34. /// The kernel images to use.
  35. ///
  36. /// You can download some images with:
  37. ///
  38. /// wget --accept-regex '.*/linux-image-[0-9\.-]+-cloud-.*-unsigned*' \
  39. /// --recursive ftp://ftp.us.debian.org/debian/pool/main/l/linux/
  40. ///
  41. /// You can then extract them with:
  42. ///
  43. /// find . -name '*.deb' -print0 \
  44. /// | xargs -0 -I {} sh -c "dpkg --fsys-tarfile {} \
  45. /// | tar --wildcards --extract '*vmlinuz*' --file -"
  46. #[clap(required = true)]
  47. kernel_image: Vec<PathBuf>,
  48. },
  49. }
  50. #[derive(Parser)]
  51. pub struct Options {
  52. #[clap(subcommand)]
  53. environment: Environment,
  54. /// Arguments to pass to your application.
  55. #[clap(global = true, last = true)]
  56. run_args: Vec<OsString>,
  57. }
  58. pub fn build<F>(target: Option<&str>, f: F) -> Result<Vec<(String, PathBuf)>>
  59. where
  60. F: FnOnce(&mut Command) -> &mut Command,
  61. {
  62. // Always use rust-lld in case we're cross-compiling.
  63. let mut cmd = Command::new("cargo");
  64. cmd.args(["build", "--message-format=json"]);
  65. if let Some(target) = target {
  66. let config = format!("target.{target}.linker = \"rust-lld\"");
  67. cmd.args(["--target", target, "--config", &config]);
  68. }
  69. f(&mut cmd);
  70. let mut child = cmd
  71. .stdout(Stdio::piped())
  72. .spawn()
  73. .with_context(|| format!("failed to spawn {cmd:?}"))?;
  74. let Child { stdout, .. } = &mut child;
  75. let stdout = stdout.take().unwrap();
  76. let stdout = BufReader::new(stdout);
  77. let mut executables = Vec::new();
  78. for message in Message::parse_stream(stdout) {
  79. #[allow(clippy::collapsible_match)]
  80. match message.context("valid JSON")? {
  81. Message::CompilerArtifact(Artifact {
  82. executable,
  83. target: Target { name, .. },
  84. ..
  85. }) => {
  86. if let Some(executable) = executable {
  87. executables.push((name, executable.into()));
  88. }
  89. }
  90. Message::CompilerMessage(CompilerMessage { message, .. }) => {
  91. for line in message.rendered.unwrap_or_default().split('\n') {
  92. println!("cargo:warning={line}");
  93. }
  94. }
  95. Message::TextLine(line) => {
  96. println!("{line}");
  97. }
  98. _ => {}
  99. }
  100. }
  101. let status = child
  102. .wait()
  103. .with_context(|| format!("failed to wait for {cmd:?}"))?;
  104. if status.code() != Some(0) {
  105. bail!("{cmd:?} failed: {status:?}")
  106. }
  107. Ok(executables)
  108. }
  109. /// Build and run the project.
  110. pub fn run(opts: Options) -> Result<()> {
  111. let Options {
  112. environment,
  113. run_args,
  114. } = opts;
  115. type Binary = (String, PathBuf);
  116. fn binaries(target: Option<&str>) -> Result<Vec<(&str, Vec<Binary>)>> {
  117. ["dev", "release"]
  118. .into_iter()
  119. .map(|profile| {
  120. let binaries = build(target, |cmd| {
  121. cmd.env(AYA_BUILD_INTEGRATION_BPF, "true").args([
  122. "--package",
  123. "integration-test",
  124. "--tests",
  125. "--profile",
  126. profile,
  127. ])
  128. })?;
  129. anyhow::Ok((profile, binaries))
  130. })
  131. .collect()
  132. }
  133. // Use --test-threads=1 to prevent tests from interacting with shared
  134. // kernel state due to the lack of inter-test isolation.
  135. let default_args = [OsString::from("--test-threads=1")];
  136. let run_args = default_args.iter().chain(run_args.iter());
  137. match environment {
  138. Environment::Local { runner } => {
  139. let mut args = runner.trim().split_terminator(' ');
  140. let runner = args.next().ok_or(anyhow!("no first argument"))?;
  141. let args = args.collect::<Vec<_>>();
  142. let binaries = binaries(None)?;
  143. let mut failures = String::new();
  144. for (profile, binaries) in binaries {
  145. for (name, binary) in binaries {
  146. let mut cmd = Command::new(runner);
  147. let cmd = cmd.args(args.iter()).arg(binary).args(run_args.clone());
  148. println!("{profile}:{name} running {cmd:?}");
  149. let status = cmd
  150. .status()
  151. .with_context(|| format!("failed to run {cmd:?}"))?;
  152. if status.code() != Some(0) {
  153. writeln!(&mut failures, "{profile}:{name} failed: {status:?}")
  154. .context("String write failed")?
  155. }
  156. }
  157. }
  158. if failures.is_empty() {
  159. Ok(())
  160. } else {
  161. Err(anyhow!("failures:\n{}", failures))
  162. }
  163. }
  164. Environment::VM {
  165. cache_dir,
  166. github_api_token,
  167. kernel_image,
  168. } => {
  169. // The user has asked us to run the tests on a VM. This is involved; strap in.
  170. //
  171. // We need tools to build the initramfs; we use gen_init_cpio from the Linux repository,
  172. // taking care to cache it.
  173. //
  174. // Then we iterate the kernel images, using the `file` program to guess the target
  175. // architecture. We then build the init program and our test binaries for that
  176. // architecture, and use gen_init_cpio to build an initramfs containing the test
  177. // binaries. We're almost ready to run the VM.
  178. //
  179. // We consult our OS, our architecture, and the target architecture to determine if
  180. // hardware acceleration is available, and then start QEMU with the provided kernel
  181. // image and the initramfs we built.
  182. //
  183. // We consume the output of QEMU, looking for the output of our init program. This is
  184. // the only way to distinguish success from failure. We batch up the errors across all
  185. // VM images and report to the user. The end.
  186. create_dir_all(&cache_dir).context("failed to create cache dir")?;
  187. let gen_init_cpio = cache_dir.join("gen_init_cpio");
  188. if !gen_init_cpio
  189. .try_exists()
  190. .context("failed to check existence of gen_init_cpio")?
  191. {
  192. // TODO(https://github.com/oxidecomputer/third-party-api-clients/issues/96): Use ETag-based caching.
  193. let client = octorust::Client::new(
  194. String::from("aya-xtask-integration-test-run"),
  195. github_api_token.map(octorust::auth::Credentials::Token),
  196. )?;
  197. let octorust::Response {
  198. status: _,
  199. headers: _,
  200. body: octorust::types::ContentFile { mut content, .. },
  201. } = tokio::runtime::Builder::new_current_thread()
  202. .enable_all()
  203. .build()
  204. .unwrap()
  205. .block_on(client.repos().get_content_file(
  206. "torvalds",
  207. "linux",
  208. "usr/gen_init_cpio.c",
  209. "master",
  210. ))
  211. .context("failed to download gen_init_cpio.c")?;
  212. // Github very helpfully wraps their base64 at 10 columns /s.
  213. content.retain(|c| !c.is_whitespace());
  214. let content = base64::engine::general_purpose::STANDARD
  215. .decode(content)
  216. .context("failed to decode gen_init_cpio.c")?;
  217. let mut clang = Command::new("clang");
  218. clang
  219. .args(["-g", "-O2", "-x", "c", "-", "-o"])
  220. .arg(&gen_init_cpio);
  221. let mut child = clang
  222. .stdin(Stdio::piped())
  223. .stdout(Stdio::piped())
  224. .stderr(Stdio::piped())
  225. .spawn()
  226. .with_context(|| format!("failed to spawn {clang:?}"))?;
  227. let Child { stdin, .. } = &mut child;
  228. let mut stdin = stdin.take().unwrap();
  229. stdin
  230. .write_all(&content)
  231. .with_context(|| format!("failed to write to {clang:?} stdin"))?;
  232. std::mem::drop(stdin); // Send EOF.
  233. let output = child
  234. .wait_with_output()
  235. .with_context(|| format!("failed to wait for {clang:?}"))?;
  236. let Output { status, .. } = &output;
  237. if status.code() != Some(0) {
  238. bail!("{clang:?} failed: {output:?}")
  239. }
  240. }
  241. let mut errors = Vec::new();
  242. for kernel_image in kernel_image {
  243. // Guess the guest architecture.
  244. let mut cmd = Command::new("file");
  245. let output = cmd
  246. .arg("--brief")
  247. .arg(&kernel_image)
  248. .output()
  249. .with_context(|| format!("failed to run {cmd:?}"))?;
  250. let Output { status, .. } = &output;
  251. if status.code() != Some(0) {
  252. bail!("{cmd:?} failed: {output:?}")
  253. }
  254. let Output { stdout, .. } = output;
  255. // Now parse the output of the file command, which looks something like
  256. //
  257. // - Linux kernel ARM64 boot executable Image, little-endian, 4K pages
  258. //
  259. // - Linux kernel x86 boot executable bzImage, version 6.1.0-10-cloud-amd64 [..]
  260. let stdout = String::from_utf8(stdout)
  261. .with_context(|| format!("invalid UTF-8 in {cmd:?} stdout"))?;
  262. let (_, stdout) = stdout
  263. .split_once("Linux kernel")
  264. .ok_or_else(|| anyhow!("failed to parse {cmd:?} stdout: {stdout}"))?;
  265. let (guest_arch, _) = stdout
  266. .split_once("boot executable")
  267. .ok_or_else(|| anyhow!("failed to parse {cmd:?} stdout: {stdout}"))?;
  268. let guest_arch = guest_arch.trim();
  269. let (guest_arch, machine, cpu, console) = match guest_arch {
  270. "ARM64" => ("aarch64", Some("virt"), Some("max"), "ttyAMA0"),
  271. "x86" => ("x86_64", None, None, "ttyS0"),
  272. guest_arch => (guest_arch, None, None, "ttyS0"),
  273. };
  274. let target = format!("{guest_arch}-unknown-linux-musl");
  275. // Build our init program. The contract is that it will run anything it finds in /bin.
  276. let init = build(Some(&target), |cmd| {
  277. cmd.args(["--package", "init", "--profile", "release"])
  278. })
  279. .context("building init program failed")?;
  280. let init = match &*init {
  281. [(name, init)] => {
  282. if name != "init" {
  283. bail!("expected init program to be named init, found {name}")
  284. }
  285. init
  286. }
  287. init => bail!("expected exactly one init program, found {init:?}"),
  288. };
  289. let binaries = binaries(Some(&target))?;
  290. let tmp_dir = tempfile::tempdir().context("tempdir failed")?;
  291. let initrd_image = tmp_dir.path().join("qemu-initramfs.img");
  292. let initrd_image_file = OpenOptions::new()
  293. .create_new(true)
  294. .write(true)
  295. .open(&initrd_image)
  296. .with_context(|| {
  297. format!("failed to create {} for writing", initrd_image.display())
  298. })?;
  299. let mut gen_init_cpio = Command::new(&gen_init_cpio);
  300. let mut gen_init_cpio_child = gen_init_cpio
  301. .arg("-")
  302. .stdin(Stdio::piped())
  303. .stdout(initrd_image_file)
  304. .spawn()
  305. .with_context(|| format!("failed to spawn {gen_init_cpio:?}"))?;
  306. let Child { stdin, .. } = &mut gen_init_cpio_child;
  307. let mut stdin = stdin.take().unwrap();
  308. use std::os::unix::ffi::OsStrExt as _;
  309. // Send input into gen_init_cpio which looks something like
  310. //
  311. // file /init path-to-init 0755 0 0
  312. // dir /bin 0755 0 0
  313. // file /bin/foo path-to-foo 0755 0 0
  314. // file /bin/bar path-to-bar 0755 0 0
  315. for bytes in [
  316. "file /init ".as_bytes(),
  317. init.as_os_str().as_bytes(),
  318. " 0755 0 0\n".as_bytes(),
  319. "dir /bin 0755 0 0\n".as_bytes(),
  320. ] {
  321. stdin.write_all(bytes).expect("write");
  322. }
  323. for (profile, binaries) in binaries {
  324. for (name, binary) in binaries {
  325. let name = format!("{}-{}", profile, name);
  326. let path = tmp_dir.path().join(&name);
  327. copy(&binary, &path).with_context(|| {
  328. format!("copy({}, {}) failed", binary.display(), path.display())
  329. })?;
  330. for bytes in [
  331. "file /bin/".as_bytes(),
  332. name.as_bytes(),
  333. " ".as_bytes(),
  334. path.as_os_str().as_bytes(),
  335. " 0755 0 0\n".as_bytes(),
  336. ] {
  337. stdin.write_all(bytes).expect("write");
  338. }
  339. }
  340. }
  341. // Must explicitly close to signal EOF.
  342. drop(stdin);
  343. let output = gen_init_cpio_child
  344. .wait_with_output()
  345. .with_context(|| format!("failed to wait for {gen_init_cpio:?}"))?;
  346. let Output { status, .. } = &output;
  347. if status.code() != Some(0) {
  348. bail!("{gen_init_cpio:?} failed: {output:?}")
  349. }
  350. let mut qemu = Command::new(format!("qemu-system-{guest_arch}"));
  351. if let Some(machine) = machine {
  352. qemu.args(["-machine", machine]);
  353. }
  354. if let Some(cpu) = cpu {
  355. qemu.args(["-cpu", cpu]);
  356. }
  357. for accel in ["kvm", "hvf", "tcg"] {
  358. qemu.args(["-accel", accel]);
  359. }
  360. let console = OsString::from(console);
  361. let mut kernel_args = std::iter::once(("console", &console))
  362. .chain(run_args.clone().map(|run_arg| ("init.arg", run_arg)))
  363. .enumerate()
  364. .fold(OsString::new(), |mut acc, (i, (k, v))| {
  365. if i != 0 {
  366. acc.push(" ");
  367. }
  368. acc.push(k);
  369. acc.push("=");
  370. acc.push(v);
  371. acc
  372. });
  373. // We sometimes see kernel panics containing:
  374. //
  375. // [ 0.064000] Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug and send a report. Then try booting with the 'noapic' option.
  376. //
  377. // Heed the advice and boot with noapic. We don't know why this happens.
  378. kernel_args.push(" noapic");
  379. qemu.args(["-no-reboot", "-nographic", "-m", "512M", "-smp", "2"])
  380. .arg("-append")
  381. .arg(kernel_args)
  382. .arg("-kernel")
  383. .arg(&kernel_image)
  384. .arg("-initrd")
  385. .arg(&initrd_image);
  386. let mut qemu_child = qemu
  387. .stdin(Stdio::piped())
  388. .stdout(Stdio::piped())
  389. .stderr(Stdio::piped())
  390. .spawn()
  391. .with_context(|| format!("failed to spawn {qemu:?}"))?;
  392. let Child {
  393. stdin,
  394. stdout,
  395. stderr,
  396. ..
  397. } = &mut qemu_child;
  398. let stdin = stdin.take().unwrap();
  399. let stdin = Arc::new(Mutex::new(stdin));
  400. let stdout = stdout.take().unwrap();
  401. let stdout = BufReader::new(stdout);
  402. let stderr = stderr.take().unwrap();
  403. let stderr = BufReader::new(stderr);
  404. const TERMINATE_AFTER_COUNT: &[(&str, usize)] = &[
  405. ("end Kernel panic", 0),
  406. ("rcu: RCU grace-period kthread stack dump:", 0),
  407. ("watchdog: BUG: soft lockup", 1),
  408. ];
  409. let mut counts = [0; TERMINATE_AFTER_COUNT.len()];
  410. let mut terminate_if_kernel_hang =
  411. move |line: &str, stdin: &Arc<Mutex<ChildStdin>>| -> anyhow::Result<()> {
  412. if let Some(i) = TERMINATE_AFTER_COUNT
  413. .iter()
  414. .position(|(marker, _)| line.contains(marker))
  415. {
  416. counts[i] += 1;
  417. let (marker, max) = TERMINATE_AFTER_COUNT[i];
  418. if counts[i] > max {
  419. println!("{marker} detected > {max} times; terminating QEMU");
  420. let mut stdin = stdin.lock().unwrap();
  421. stdin
  422. .write_all(&[0x01, b'x'])
  423. .context("failed to write to stdin")?;
  424. println!("waiting for QEMU to terminate");
  425. }
  426. }
  427. Ok(())
  428. };
  429. let stderr = {
  430. let stdin = stdin.clone();
  431. thread::Builder::new()
  432. .spawn(move || {
  433. for line in stderr.lines() {
  434. let line = line.context("failed to read line from stderr")?;
  435. eprintln!("{}", line);
  436. terminate_if_kernel_hang(&line, &stdin)?;
  437. }
  438. anyhow::Ok(())
  439. })
  440. .unwrap()
  441. };
  442. let mut outcome = None;
  443. for line in stdout.lines() {
  444. let line = line.context("failed to read line from stdout")?;
  445. println!("{}", line);
  446. terminate_if_kernel_hang(&line, &stdin)?;
  447. // The init program will print "init: success" or "init: failure" to indicate
  448. // the outcome of running the binaries it found in /bin.
  449. if let Some(line) = line.strip_prefix("init: ") {
  450. let previous = match line {
  451. "success" => outcome.replace(Ok(())),
  452. "failure" => outcome.replace(Err(())),
  453. line => bail!("unexpected init output: {}", line),
  454. };
  455. if let Some(previous) = previous {
  456. bail!("multiple exit status: previous={previous:?}, current={line}");
  457. }
  458. }
  459. }
  460. let output = qemu_child
  461. .wait_with_output()
  462. .with_context(|| format!("failed to wait for {qemu:?}"))?;
  463. let Output { status, .. } = &output;
  464. if status.code() != Some(0) {
  465. bail!("{qemu:?} failed: {output:?}")
  466. }
  467. stderr.join().unwrap()?;
  468. let outcome = outcome.ok_or(anyhow!("init did not exit"))?;
  469. match outcome {
  470. Ok(()) => {}
  471. Err(()) => {
  472. errors.push(anyhow!("VM binaries failed on {}", kernel_image.display()))
  473. }
  474. }
  475. }
  476. if errors.is_empty() {
  477. Ok(())
  478. } else {
  479. Err(Errors::new(errors).into())
  480. }
  481. }
  482. }
  483. }