Browse Source

feat: 添加内核性能分析工具 (#91)

* feat: 允许用户通过dadk来观测内核的性能,生成火焰图

* feat(profile): 导入文件并解析

* doc: 添加使用dadk进行内核性能分析的文档

---------

Signed-off-by: longjin <longjin@DragonOS.org>
LoGin 4 months ago
parent
commit
9f4c276062

+ 8 - 0
dadk/Cargo.toml

@@ -30,12 +30,20 @@ doc = true
 [dependencies]
 anyhow = { version = "1.0.90", features = ["std", "backtrace"] }
 clap = { version = "4.5.20", features = ["derive"] }
+crossbeam = "0.8.4"
 dadk-config = { path = "../dadk-config" }
 dadk-user = { path = "../dadk-user" }
 derive_builder = "0.20.0"
 env_logger = "0.11.5"
+humantime = "2.1.0"
+indicatif = "0.17.9"
+inferno = "0.12.0"
+lazy_static = "1.4.0"
 log = "0.4.22"
+rayon = "1.10.0"
 regex = "1.9.1"
+serde = { version = "1.0.160", features = ["serde_derive"] }
+serde_json = "1.0.96"
 
 [dev-dependencies]
 tempfile = "3.13.0"

+ 4 - 0
dadk/src/actions/mod.rs

@@ -1,5 +1,6 @@
 use crate::context::DADKExecContext;
 
+pub mod profile;
 pub mod rootfs;
 pub mod user;
 
@@ -14,5 +15,8 @@ pub fn run(ctx: DADKExecContext) {
         crate::console::Action::User(user_command) => {
             user::run(&ctx, user_command).expect("Run user action error.")
         }
+        crate::console::Action::Profile(profile_command) => {
+            profile::run(&ctx, profile_command).expect("Run profile action error.")
+        }
     }
 }

+ 570 - 0
dadk/src/actions/profile/mod.rs

@@ -0,0 +1,570 @@
+use std::{
+    collections::{BTreeMap, HashMap},
+    io::{Read, Write},
+    path::PathBuf,
+    process::Command,
+    sync::{
+        atomic::{AtomicBool, Ordering},
+        Arc, Mutex, Weak,
+    },
+};
+
+use crate::{
+    console::profile::{ProfileCommand, ProfileFileType, ProfileParseArgs, ProfileSampleArgs},
+    context::DADKExecContext,
+};
+
+use anyhow::{anyhow, Result};
+use indicatif::{ProgressBar, ProgressStyle};
+use lazy_static::lazy_static;
+use rayon::ThreadPoolBuilder;
+use serde::{Deserialize, Serialize};
+
+lazy_static! {
+    static ref GUEST_ADDRESS_HEX_PATTERN: regex::Regex =
+        regex::Regex::new(r"0x[0-9a-fA-F]+ in").unwrap();
+    static ref RUST_IMPL_PATTERN: regex::Regex = regex::Regex::new(r"::\{.*?\}").unwrap();
+}
+
+pub(super) fn run(ctx: &DADKExecContext, cmd: &ProfileCommand) -> Result<()> {
+    match cmd {
+        ProfileCommand::Sample(profile_sample_args) => sample(ctx, profile_sample_args),
+        ProfileCommand::Parse(profile_parse_args) => parse_input_data(ctx, profile_parse_args),
+    }
+}
+
+fn sample(_ctx: &DADKExecContext, args: &ProfileSampleArgs) -> Result<()> {
+    let profiler = Profiler::new(args.clone());
+    profiler.run()?;
+    profiler.save()
+}
+
+fn parse_input_data(_ctx: &DADKExecContext, args: &ProfileParseArgs) -> Result<()> {
+    let sample_buf =
+        SampleBuffer::from_saved_file(&args.input).expect("Failed to load sample buffer");
+    sample_buf.export_data(args.format, &args.output, args.cpu_mask);
+    log::info!("Profile data saved to {}", args.output.display());
+    Ok(())
+}
+
+/// 一个时刻的采样数据
+#[derive(Debug, Serialize, Deserialize, Clone)]
+struct Sample {
+    /// The sample data
+    /// The key is the cpu id
+    /// The value is the sample data
+    data: BTreeMap<usize, Vec<String>>,
+    id: usize,
+    timestamp: usize,
+    #[serde(skip)]
+    current_cpu: Option<usize>,
+}
+
+impl Sample {
+    fn new(id: usize, timestamp: usize) -> Self {
+        Self {
+            data: BTreeMap::new(),
+            id,
+            timestamp,
+            current_cpu: None,
+        }
+    }
+
+    fn push_new_line(&mut self, line: &str) {
+        if line.starts_with("#") {
+            self.parse_frame_line(line);
+        } else {
+            self.parse_thread_line(line);
+        }
+    }
+
+    fn parse_frame_line(&mut self, line: &str) {
+        let line = line.trim();
+        // todo: 支持调整删除的`<>`的层级,以便打印更详细的信息
+        let line = remove_angle_bracket_content(&line);
+        let line = remove_guest_address(&line);
+        let mut line = remove_rust_impl_pattern(&line);
+        line = line.replace("(...)", "");
+        line = line.replace("()", "");
+
+        let parts = line.split_whitespace().collect::<Vec<_>>();
+        if parts.len() >= 2 {
+            let fn_name = parts[1];
+            self.data
+                .get_mut(&self.current_cpu.unwrap())
+                .unwrap()
+                .push(fn_name.to_string());
+        }
+    }
+
+    fn parse_thread_line(&mut self, line: &str) {
+        if line.starts_with("Thread") {
+            let idx = line.find("CPU#").unwrap();
+            self.current_cpu = Some(
+                line[idx + 4..]
+                    .split_whitespace()
+                    .next()
+                    .unwrap()
+                    .parse::<usize>()
+                    .unwrap(),
+            );
+
+            if !self.data.contains_key(&self.current_cpu.unwrap()) {
+                self.data.insert(self.current_cpu.unwrap(), Vec::new());
+            } else {
+                log::error!(
+                    "current cpu {} is already set in hashmap",
+                    self.current_cpu.unwrap()
+                );
+            }
+        }
+    }
+
+    #[allow(dead_code)]
+    fn vcpu_count(&self) -> usize {
+        self.data.len()
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct SampleBuffer {
+    samples: Vec<Sample>,
+}
+
+impl SampleBuffer {
+    fn new() -> Self {
+        Self {
+            samples: Vec::new(),
+        }
+    }
+
+    fn push(&mut self, sample: Sample) {
+        self.samples.push(sample);
+    }
+
+    fn export_data(&self, t: ProfileFileType, outpath: &PathBuf, cpumask: Option<u128>) {
+        let mut writer = std::fs::File::create(outpath).unwrap();
+        match t {
+            ProfileFileType::Json => {
+                let filtered = self.filter_cpu(cpumask);
+                serde_json::to_writer(&mut writer, &filtered).unwrap();
+            }
+            ProfileFileType::Folded => {
+                let folded = self.fold(cpumask);
+                writer.write(folded.to_string().as_bytes()).unwrap();
+            }
+            ProfileFileType::Flamegraph => {
+                let folded = self.fold(cpumask);
+                let lines: Vec<String> = folded
+                    .data
+                    .iter()
+                    .map(|(k, cnt)| format!("{} {}", k, cnt))
+                    .collect();
+
+                let mut opt = inferno::flamegraph::Options::default();
+                inferno::flamegraph::from_lines(&mut opt, lines.iter().map(|s| s.as_str()), writer)
+                    .unwrap();
+            }
+        }
+    }
+
+    fn filter_cpu(&self, cpumask: Option<u128>) -> SampleBuffer {
+        let cpumask = cpumask.unwrap_or(u128::MAX);
+        let mut result = SampleBuffer::new();
+        self.samples.iter().for_each(|s| {
+            let mut sample = Sample::new(s.id, s.timestamp);
+            s.data.iter().for_each(|(cpu, stack)| {
+                if *cpu < 128 && (cpumask & (1 << cpu) != 0) {
+                    sample.data.insert(*cpu, stack.clone());
+                }
+            });
+            result.push(sample);
+        });
+
+        result
+    }
+
+    fn fold(&self, cpumask: Option<u128>) -> FoldedSampleBuffer {
+        let mut folded_buffer = FoldedSampleBuffer::default();
+        let cpumask = cpumask.unwrap_or(u128::MAX);
+
+        for sample in &self.samples {
+            for (cpu, stack) in &sample.data {
+                if *cpu < 128 && (cpumask & (1 << *cpu)) != 0 {
+                    let folded_stack = stack.iter().rev().cloned().collect::<Vec<_>>().join(";");
+                    if let Some(cnt) = folded_buffer.data.get_mut(&folded_stack) {
+                        *cnt += 1;
+                    } else {
+                        folded_buffer.data.insert(folded_stack, 1);
+                    }
+                }
+            }
+        }
+
+        folded_buffer
+    }
+
+    fn from_saved_file(path: &PathBuf) -> Result<Self> {
+        let mut file = std::fs::File::open(path)?;
+        let mut buf = String::new();
+        file.read_to_string(&mut buf)?;
+        let samples = serde_json::from_str::<SampleBuffer>(&buf).ok();
+        if let Some(samples) = samples {
+            return Ok(samples);
+        }
+
+        // check if it is a folded file
+        let folded = FoldedSampleBuffer::try_from(&buf)?;
+        Ok(folded.into())
+    }
+}
+
+struct Profiler {
+    samples: Mutex<SampleBuffer>,
+    self_ref: Weak<Profiler>,
+
+    args: ProfileSampleArgs,
+}
+
+impl Profiler {
+    fn new(args: ProfileSampleArgs) -> Arc<Profiler> {
+        Arc::new_cyclic(|self_ref| Self {
+            samples: Mutex::new(SampleBuffer::new()),
+            args,
+            self_ref: self_ref.clone(),
+        })
+    }
+
+    fn run(&self) -> Result<()> {
+        let thread_pool = ThreadPoolBuilder::default()
+            .num_threads(self.args.workers)
+            .build()
+            .map_err(|e| anyhow!("failed to build thread pool: {}", e))?;
+        let duration = self.args.duration();
+        let interval = self.args.interval();
+
+        // Create a channel for communication
+        let (sender, receiver) = crossbeam::channel::unbounded::<Option<Sample>>();
+        let mut id = 0;
+        let maxid = (duration.as_millis() / interval.as_millis()) as usize;
+
+        let rx_handle = {
+            let p = self.self_ref.upgrade().unwrap();
+
+            std::thread::spawn(move || {
+                let pb = ProgressBar::new(maxid as u64);
+                pb.set_style(
+                    ProgressStyle::default_bar()
+                        .template(
+                            "{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({eta})",
+                        )
+                        .unwrap()
+                        .progress_chars("#>-"),
+                );
+                let mut guard = p.samples.lock().unwrap();
+                while guard.samples.len() < maxid {
+                    let sample = receiver.recv().ok().flatten();
+                    if let Some(sample) = sample {
+                        guard.push(sample);
+                        pb.inc(1);
+                    } else {
+                        break;
+                    }
+                }
+            })
+        };
+        let rx_exited = Arc::new(AtomicBool::new(false));
+        let generator_handle = {
+            let rxe = rx_exited.clone();
+            let p = self.self_ref.upgrade().unwrap();
+            std::thread::spawn(move || {
+                while id < maxid {
+                    if rxe.load(Ordering::SeqCst) {
+                        break;
+                    }
+                    let sd = sender.clone();
+                    let pp = p.clone();
+                    thread_pool.spawn_fifo(move || {
+                        if let Ok(sample) = pp.do_sample_one(id) {
+                            sd.send(Some(sample)).unwrap();
+                        } else {
+                            sd.send(None).unwrap();
+                        }
+                    });
+
+                    id += 1;
+                    std::thread::sleep(interval);
+                }
+            })
+        };
+        rx_handle.join().unwrap();
+        rx_exited.store(true, Ordering::SeqCst);
+        generator_handle.join().unwrap();
+
+        Ok(())
+    }
+
+    fn save(&self) -> Result<()> {
+        self.samples.lock().unwrap().export_data(
+            self.args.format,
+            &self.args.output,
+            self.args.cpu_mask,
+        );
+        Ok(())
+    }
+
+    fn kernel_path(&self) -> &PathBuf {
+        &self.args.kernel
+    }
+
+    fn remote(&self) -> &str {
+        &self.args.remote
+    }
+
+    fn do_sample_one(&self, id: usize) -> Result<Sample> {
+        let output = Command::new("gdb")
+            .args([
+                "-batch",
+                "-ex",
+                "set pagination off",
+                "-ex",
+                "set logging file /dev/null",
+                "-ex",
+                &format!("file {}", &self.kernel_path().display()),
+                "-ex",
+                &format!("target remote {}", &self.remote()),
+                "-ex",
+                "thread apply all bt -frame-arguments presence -frame-info short-location",
+            ])
+            .output()
+            .map_err(|e| anyhow::anyhow!("[sample {}]: failed to execute gdb: {}", id, e))?;
+
+        let timestamp = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_millis() as usize;
+        let mut sample = Sample::new(id, timestamp);
+
+        for line in String::from_utf8_lossy(&output.stdout).lines() {
+            sample.push_new_line(line);
+        }
+
+        Ok(sample)
+    }
+}
+
+#[derive(Debug, Default)]
+struct FoldedSampleBuffer {
+    /// The folded sample data
+    /// key: Stack trace (separated by `;`)
+    /// value: The number of occurrences of such stack frames
+    data: HashMap<String, usize>,
+}
+impl FoldedSampleBuffer {
+    pub fn try_from<T: AsRef<str>>(s: T) -> Result<Self> {
+        let s = s.as_ref();
+        let mut data = HashMap::new();
+
+        for line in s.lines() {
+            let parts: Vec<&str> = line.split(' ').collect();
+            if parts.len() != 2 {
+                return Err(anyhow!("Invalid format"));
+            }
+
+            let key = parts[0].trim().to_string();
+            let value = parts[1]
+                .trim()
+                .parse::<usize>()
+                .map_err(|_| anyhow!("Invalid number"))?;
+
+            data.insert(key, value);
+        }
+
+        Ok(FoldedSampleBuffer { data })
+    }
+}
+
+impl ToString for FoldedSampleBuffer {
+    fn to_string(&self) -> String {
+        let lines: Vec<String> = self
+            .data
+            .iter()
+            .map(|(k, v)| format!("{} {}", k, v))
+            .collect();
+        lines.join("\n")
+    }
+}
+
+impl Into<SampleBuffer> for FoldedSampleBuffer {
+    fn into(self) -> SampleBuffer {
+        let mut samples = SampleBuffer::new();
+        for (stack, count) in self.data {
+            let mut sample = Sample::new(0, 0);
+            for frame in stack.split(';').rev() {
+                sample.push_new_line(frame);
+            }
+            for _ in 0..count {
+                samples.push(sample.clone());
+            }
+        }
+        samples
+    }
+}
+
+/// Removes content within angle brackets from the input string.
+///
+/// This function iterates through each character in the input string and
+/// removes any characters that are inside angle brackets (`<` and `>`).
+/// Nested brackets are handled correctly by maintaining a count of open
+/// brackets. Characters outside of any brackets are added to the result.
+///
+/// # Arguments
+///
+/// * `input` - A string slice that holds the input string to be processed.
+///
+/// # Returns
+///
+/// A new `String` with the content inside angle brackets removed.
+fn remove_angle_bracket_content(input: &str) -> String {
+    let mut result = String::new();
+    let mut inside_brackets = 0;
+
+    for c in input.chars() {
+        if c == '<' {
+            inside_brackets += 1;
+            continue;
+        } else if c == '>' {
+            inside_brackets -= 1;
+            continue; // Skip the closing bracket
+        }
+
+        // TODO: 支持调整层级数,以便打印更精细的信息?
+        if inside_brackets == 0 {
+            result.push(c);
+        }
+    }
+
+    result
+}
+
+fn remove_guest_address(input: &str) -> String {
+    GUEST_ADDRESS_HEX_PATTERN.replace_all(input, "").to_string()
+}
+
+fn remove_rust_impl_pattern(input: &str) -> String {
+    RUST_IMPL_PATTERN.replace_all(input, "").to_string()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_remove_angle_bracket_content_no_brackets() {
+        let input = "Hello, World!";
+        let expected = "Hello, World!";
+        assert_eq!(remove_angle_bracket_content(input), expected);
+    }
+
+    #[test]
+    fn test_remove_angle_bracket_content_single_pair() {
+        let input = "Hello <World>!";
+        let expected = "Hello !";
+        assert_eq!(remove_angle_bracket_content(input), expected);
+    }
+
+    #[test]
+    fn test_remove_angle_bracket_content_multiple_pairs() {
+        let input = "Hello <World> <Again>!";
+        let expected = "Hello  !";
+        assert_eq!(remove_angle_bracket_content(input), expected);
+    }
+
+    #[test]
+    fn test_remove_angle_bracket_content_nested_brackets() {
+        let input = "Hello <W<or>ld>!";
+        let expected = "Hello !";
+        assert_eq!(remove_angle_bracket_content(input), expected);
+    }
+    #[test]
+    fn test_remove_angle_bracket_content_unmatched_brackets() {
+        let input = "Hello <World!";
+        let expected = "Hello ";
+        assert_eq!(remove_angle_bracket_content(input), expected);
+    }
+
+    #[test]
+    fn test_rust_impl_pattern() {
+        let line = "#2  alloc::sync::{impl#37}::drop<dragonos_kernel::process::ProcessControlBlock, alloc::alloc::Global> (...)";
+        let expected: &str = "#2  alloc::sync::drop<dragonos_kernel::process::ProcessControlBlock, alloc::alloc::Global> (...)";
+        assert_eq!(remove_rust_impl_pattern(line), expected);
+    }
+
+    #[test]
+    fn test_guest_address_hex_pattern() {
+        let line = "#7  0xffff800001080320 in _ZN15dragonos_kernel4arch6x86_647process5table11TSS_MANAGER17hfcb0efdd9e498178E.llvm.3349419859655245662 ()";
+        let expected = "#7   _ZN15dragonos_kernel4arch6x86_647process5table11TSS_MANAGER17hfcb0efdd9e498178E.llvm.3349419859655245662 ()";
+        assert_eq!(remove_guest_address(line), expected);
+    }
+
+    #[test]
+    fn test_profile_parse_one_sample() {
+        let stack = r#"
+Thread 2 (Thread 1.2 (CPU#1 [halted ])):
+#0  core::ptr::non_null::NonNull<alloc::sync::ArcInner<dragonos_kernel::process::ProcessControlBlock>>::as_ref<alloc::sync::ArcInner<dragonos_kernel::process::ProcessControlBlock>> (...)
+#1  alloc::sync::Arc<dragonos_kernel::process::ProcessControlBlock, alloc::alloc::Global>::inner<dragonos_kernel::process::ProcessControlBlock, alloc::alloc::Global> (...)
+#2  alloc::sync::{impl#37}::drop<dragonos_kernel::process::ProcessControlBlock, alloc::alloc::Global> (...)
+#3  core::ptr::drop_in_place<alloc::sync::Arc<dragonos_kernel::process::ProcessControlBlock, alloc::alloc::Global>> ()
+#4  dragonos_kernel::process::ProcessManager::arch_idle_func ()
+#5  0xffff80001ff94800 in ?? ()
+#6  0x0000000000020097 in ?? ()
+#7  0xffff800001080320 in _ZN15dragonos_kernel4arch6x86_647process5table11TSS_MANAGER17hfcb0efdd9e498178E.llvm.3349419859655245662 ()
+#8  0x00000000f7b82223 in ?? ()
+#9  0x00000000178bfbff in ?? ()
+#10 0x0000000001020800 in ?? ()
+#11 0x0000000000000096 in ?? ()
+#12 0xffff80001ff94c28 in ?? ()
+#13 0x0000000000000010 in ?? ()
+#14 0x0000000000000010 in ?? ()
+#15 0x00000000000306a9 in ?? ()
+#16 0x00000000000306a9 in ?? ()
+#17 0xffff800001080320 in _ZN15dragonos_kernel4arch6x86_647process5table11TSS_MANAGER17hfcb0efdd9e498178E.llvm.3349419859655245662 ()
+#18 0xffff80001ff94c38 in ?? ()
+#19 0xffff80001ff8bf58 in ?? ()
+#20 0xffff80001ff8bf50 in ?? ()
+#21 0xffff80001ff8bf88 in ?? ()
+#22 0xffff80001ff94c28 in ?? ()
+#23 0xffff8000001e196a in dragonos_kernel::smp::init::smp_ap_start_stage2 ()
+#24 0x0000000000000001 in ?? ()
+#25 0xffff800000182638 in dragonos_kernel::arch::x86_64::smp::smp_ap_start_stage1 ()
+#26 0x0000000000000000 in ?? ()
+
+Thread 1 (Thread 1.1 (CPU#0 [running])):
+#0  core::sync::atomic::AtomicUsize::fetch_update<fn(usize) -> core::option::Option<usize>> (...)
+#1  alloc::sync::Weak<dragonos_kernel::process::ProcessControlBlock, alloc::alloc::Global>::upgrade<dragonos_kernel::process::ProcessControlBlock, alloc::alloc::Global> (...)
+#2  dragonos_kernel::process::ProcessControlBlock::arch_current_pcb ()
+#3  dragonos_kernel::process::ProcessManager::current_pcb ()
+#4  0xffff80001f988de8 in ?? ()
+#5  0xffff80001f988de8 in ?? ()
+#6  0xffff80001f988dd0 in ?? ()
+#7  0x0000000000000000 in ?? ()
+        "#;
+        let mut sample = Sample::new(0, 0);
+        for line in stack.lines() {
+            sample.push_new_line(line);
+        }
+        assert_eq!(sample.vcpu_count(), 2);
+        assert_eq!(sample.data.get(&0).unwrap().len(), 8);
+        assert_eq!(sample.data.get(&1).unwrap().len(), 27);
+
+        assert_eq!(
+            sample.data.get(&0).unwrap()[0],
+            "core::sync::atomic::AtomicUsize::fetch_update"
+        );
+        assert_eq!(
+            sample.data.get(&1).unwrap()[0],
+            "core::ptr::non_null::NonNull::as_ref"
+        );
+        println!("{:?}", sample);
+    }
+}

+ 15 - 0
dadk/src/console/mod.rs

@@ -1,7 +1,9 @@
 use clap::{Parser, Subcommand};
+use profile::ProfileCommand;
 use rootfs::RootFSCommand;
 use user::UserCommand;
 
+pub mod profile;
 pub mod rootfs;
 #[cfg(test)]
 mod tests;
@@ -38,4 +40,17 @@ pub enum Action {
     /// 用户程序构建相关操作
     #[command(subcommand, name = "user")]
     User(UserCommand),
+
+    #[command(subcommand, name = "profile")]
+    Profile(ProfileCommand),
+}
+
+impl Action {
+    /// 是否需要在dadk启动时读取 manifest 文件
+    pub fn needs_manifest(&self) -> bool {
+        if matches!(self, Action::Profile(_)) {
+            return false;
+        }
+        return true;
+    }
 }

+ 167 - 0
dadk/src/console/profile.rs

@@ -0,0 +1,167 @@
+use std::{path::PathBuf, time::Duration};
+
+use anyhow::{anyhow, Result};
+use clap::{Parser, Subcommand};
+
+#[derive(Debug, Subcommand, Clone, PartialEq, Eq)]
+pub enum ProfileCommand {
+    #[clap(about = "Sample the kernel")]
+    Sample(ProfileSampleArgs),
+    #[clap(about = "Parse the collected sample data")]
+    Parse(ProfileParseArgs),
+}
+
+#[derive(Debug, Parser, Clone, PartialEq, Eq)]
+pub struct ProfileSampleArgs {
+    #[clap(
+        long = "kernel",
+        help = "Path to the kernel image to use",
+        default_value = "./bin/kernel/kernel.elf"
+    )]
+    pub kernel: PathBuf,
+    #[clap(
+        long = "interval",
+        help = "Interval between samples (e.g., 200ms, 1s, 1m)",
+        default_value = "200ms",
+        value_parser = parse_time_interval
+    )]
+    interval: Duration,
+
+    #[clap(
+        long = "duration",
+        help = "Duration of the sampling in seconds",
+        default_value = "10s",
+        value_parser = parse_time_interval
+    )]
+    duration: Duration,
+    #[clap(long = "output", help = "Path of the output file")]
+    pub output: PathBuf,
+
+    #[clap(
+        long = "format",
+        help = "Output file format (flamegraph, json, folded)",
+        default_value = "flamegraph",
+        value_parser = parse_profile_file_type
+    )]
+    pub format: ProfileFileType,
+
+    #[clap(
+        long = "remote",
+        help = "Remote address to connect to",
+        default_value = "localhost:1234"
+    )]
+    pub remote: String,
+
+    #[clap(
+        long = "workers",
+        help = "Number of worker threads to use",
+        default_value = "3"
+    )]
+    pub workers: usize,
+    #[clap(
+        long = "cpu-mask",
+        help = "CPU mask to filter",
+        value_parser = parse_cpu_mask
+    )]
+    pub cpu_mask: Option<u128>,
+}
+
+impl ProfileSampleArgs {
+    pub fn interval(&self) -> Duration {
+        self.interval
+    }
+
+    pub fn duration(&self) -> Duration {
+        self.duration
+    }
+}
+
+fn parse_time_interval(interval: &str) -> Result<Duration> {
+    let interval = interval
+        .parse::<humantime::Duration>()
+        .map_err(|e| anyhow!("Failed to parse interval: {}, error: {}", interval, e))?;
+    Ok(interval.into())
+}
+
+fn parse_profile_file_type(format: &str) -> Result<ProfileFileType> {
+    match format.trim().to_ascii_lowercase().as_str() {
+        "json" => Ok(ProfileFileType::Json),
+        "folded" => Ok(ProfileFileType::Folded),
+        "flamegraph" => Ok(ProfileFileType::Flamegraph),
+        _ => Err(anyhow!("Unknown profile file type: {}", format)),
+    }
+}
+
+fn parse_cpu_mask(s: &str) -> Result<u128> {
+    let mask = if s.starts_with("0x") || s.starts_with("0X") {
+        u128::from_str_radix(&s[2..], 16)
+    } else {
+        s.parse::<u128>()
+    };
+
+    let mask = mask.map_err(|e| anyhow!("Failed to parse cpu mask: {}, error: {}", s, e))?;
+    Ok(mask)
+}
+
+#[derive(Debug, Parser, Clone, PartialEq, Eq)]
+pub struct ProfileParseArgs {
+    #[clap(
+        long = "duration",
+        help = "Duration of the sampling in seconds",
+        default_value = "10s",
+        value_parser = parse_time_interval
+    )]
+    duration: Duration,
+    #[clap(long = "input", help = "Path of the input file")]
+    pub input: PathBuf,
+
+    #[clap(long = "output", help = "Path of the output file")]
+    pub output: PathBuf,
+
+    #[clap(
+        long = "format",
+        help = "Output file format (flamegraph, json, folded)",
+        default_value = "flamegraph",
+        value_parser = parse_profile_file_type
+    )]
+    pub format: ProfileFileType,
+
+    #[clap(
+        long = "cpu-mask",
+        help = "CPU mask to filter",
+        value_parser = parse_cpu_mask
+    )]
+    pub cpu_mask: Option<u128>,
+}
+
+/// 输出的文件类型
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ProfileFileType {
+    /// Json格式
+    Json,
+    /// 栈帧折叠格式
+    Folded,
+    /// 火焰图
+    Flamegraph,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_time_interval() {
+        assert_eq!(
+            parse_time_interval("1ms").unwrap(),
+            Duration::from_millis(1)
+        );
+        assert_eq!(parse_time_interval("1s").unwrap(), Duration::from_secs(1));
+        assert_eq!(parse_time_interval("1m").unwrap(), Duration::from_secs(60));
+    }
+
+    #[test]
+    fn test_parse_cpu_mask() {
+        assert_eq!(parse_cpu_mask("1").unwrap(), 1);
+        assert_eq!(parse_cpu_mask("0x1").unwrap(), 1);
+    }
+}

+ 1 - 1
dadk/src/context/manifest.rs

@@ -19,6 +19,6 @@ pub(super) fn parse_manifest(builder: &mut DADKExecContextBuilder) -> Result<()>
         return Err(anyhow!("Manifest path does not exist or is not a file"));
     }
     let dadk_manifest_file = DadkManifestFile::load(&manifest_path)?;
-    builder.manifest = Some(dadk_manifest_file);
+    builder.manifest = Some(Some(dadk_manifest_file));
     Ok(())
 }

+ 16 - 8
dadk/src/context/mod.rs

@@ -20,7 +20,7 @@ mod manifest;
 pub struct DADKExecContext {
     pub command: CommandLineArgs,
     /// DADK manifest file
-    pub manifest: DadkManifestFile,
+    manifest: Option<DadkManifestFile>,
 
     /// RootFS config file
     rootfs: OnceCell<RootFSConfigFile>,
@@ -30,8 +30,12 @@ pub fn build_exec_context() -> Result<DADKExecContext> {
     let mut builder = DADKExecContextBuilder::create_empty();
     builder.command(CommandLineArgs::parse());
     builder.rootfs(OnceCell::new());
-    parse_manifest(&mut builder).expect("Failed to parse manifest");
-    let ctx = builder.build()?;
+    if builder.command.as_ref().unwrap().action.needs_manifest() {
+        parse_manifest(&mut builder).expect("Failed to parse manifest");
+    } else {
+        builder.manifest(None);
+    }
+    let ctx: DADKExecContext = builder.build()?;
     ctx.setup_workdir().expect("Failed to setup workdir");
     Ok(ctx)
 }
@@ -50,16 +54,20 @@ impl DADKExecContext {
     /// Get rootfs configuration
     pub fn rootfs(&self) -> &RootFSConfigFile {
         self.rootfs.get_or_init(|| {
-            RootFSConfigFile::load(&self.manifest.metadata.rootfs_config)
+            RootFSConfigFile::load(&self.manifest().metadata.rootfs_config)
                 .expect("Failed to load rootfs config")
         })
     }
 
+    pub fn manifest(&self) -> &DadkManifestFile {
+        self.manifest.as_ref().unwrap()
+    }
+
     /// Get sysroot directory
     ///
     /// If the directory does not exist, or the path is not a folder, an error is returned
     pub fn sysroot_dir(&self) -> Result<PathBuf> {
-        check_dir_exists(&self.manifest.metadata.sysroot_dir)
+        check_dir_exists(&self.manifest().metadata.sysroot_dir)
             .map(|p| p.clone())
             .map_err(|e| anyhow::anyhow!("Failed to get sysroot dir: {}", e))
     }
@@ -68,20 +76,20 @@ impl DADKExecContext {
     ///
     /// If the directory does not exist, or the path is not a folder, an error is returned
     pub fn cache_root_dir(&self) -> Result<PathBuf> {
-        check_dir_exists(&self.manifest.metadata.cache_root_dir)
+        check_dir_exists(&self.manifest().metadata.cache_root_dir)
             .map(|p| p.clone())
             .map_err(|e| anyhow::anyhow!("Failed to get cache root dir: {}", e))
     }
 
     #[deprecated]
     pub fn user_config_dir(&self) -> Result<PathBuf> {
-        check_dir_exists(&self.manifest.metadata.user_config_dir)
+        check_dir_exists(&self.manifest().metadata.user_config_dir)
             .map(|p| p.clone())
             .map_err(|e| anyhow::anyhow!("Failed to get user config dir: {}", e))
     }
 
     pub fn target_arch(&self) -> TargetArch {
-        self.manifest.metadata.arch
+        self.manifest().metadata.arch
     }
 
     /// 获取磁盘镜像的路径,路径由工作目录、架构和固定文件名组成

+ 1 - 0
docs/.vuepress/config.js

@@ -49,6 +49,7 @@ export default defineUserConfig({
                         text: '用户指南',
                         children: [
                             '/user-manual/quickstart.md',
+                            '/user-manual/profiling.md',
                             
                         ]
                     }

+ 1 - 0
docs/user-manual/README.md

@@ -6,3 +6,4 @@
 ## 目录
 
 - [Quick Start - 快速开始!](./quickstart.md)
+- [对DragonOS内核进行性能分析](./profiling.md)

+ 95 - 0
docs/user-manual/profiling.md

@@ -0,0 +1,95 @@
+# 对DragonOS内核进行性能分析
+
+## 1. 概述
+
+本文将教你使用DADK,对DragonOS内核进行性能分析,以识别和解决潜在的性能瓶颈。
+
+### 1.1 准备工作
+
+::: tip
+在开始之前,请确保你已经安装了DADK,并且已经配置好了DragonOS内核的编译环境。
+:::
+
+### 1.2 什么是火焰图?
+
+如果你没有听说过火焰图,可以先阅读这篇文章:[《如何读懂火焰图?- 阮一峰》](https://www.ruanyifeng.com/blog/2017/09/flame-graph.html)
+
+简单的说,火焰图是基于性能采样结果产生的 SVG 图片,用来展示 CPU 的调用栈。
+
+![](https://web-static2.dragonos.org.cn//longjin/202411252121491.png?imageSlim)
+
+x 轴表示抽样数,如果一个函数在 x 轴占据的宽度越宽,就表示它被抽到的次数多,即执行的时间长。注意,x 轴不代表时间,而是所有的调用栈合并后,按字母顺序排列的。
+
+火焰图就是看顶层的哪个函数占据的宽度最大。只要有"平顶"(plateaus),就表示该函数可能存在性能问题。
+
+颜色没有特殊含义,因为火焰图表示的是 CPU 的繁忙程度,所以一般选择暖色调。
+
+## 2. 配置DragonOS内核
+
+由于性能分析需要详尽的符号表数据,因此我们需要在编译内核时,需要进行以下配置:
+
+在`kernel/Cargo.toml`中的`[profile.release]`部分,设置以下两项:
+
+```toml
+[profile.release]
+debug = true
+opt-level = 1
+```
+
+这样,编译出来的内核就会包含符号表数据,方便我们进行性能分析。
+
+## 3. 使用DADK进行性能分析
+
+### 3.1 启动内核
+
+首先,我们需要启动DragonOS内核。
+
+```shell
+# 使用你喜欢的方式启动内核,例如:
+make run
+# 或者
+make build && make qemu-nographic
+```
+
+### 3.2 运行你的工作负载
+
+在启动内核后,我们需要运行一些工作负载,以便进行性能分析。
+
+这可以是一个应用程序,也可以是别的东西。甚至你可以什么都不运行,只是单纯看看DragonOS内核在空闲时的调用栈情况。
+
+### 3.3 启动DADK进行性能分析
+
+在DragonOS项目目录下,运行以下命令:
+
+```shell
+dadk profile sample --format flamegraph  --output flame.svg --interval 200ms --duration 20s  --cpu-mask 0x1
+```
+
+上面的命令,将会对DragonOS内核进行性能分析,并生成一个火焰图。
+
+详细解释:
+
+- `--format flamegraph`:指定输出格式为火焰图。
+- `--output flame.svg`:指定输出文件名为`flame.svg`。
+- `--interval 200ms`:指定采样间隔为200ms。
+- `--duration 20s`:指定采样时间为20s。
+- `--cpu-mask 0x1`:指定采样的CPU为0号CPU。(这是个按位掩码,也就是说,如果要采样0和1号CPU,那么cpu-mask为0x3)
+
+*更多参数请参考`dadk profile sample --help`.*
+
+::: tip
+由于采样时会暂停vCPU,因此采样时间不宜过短,否则会影响系统的正常运行。
+:::
+
+经过一段时间的等待,你将会得到一个`flame.svg`文件。
+
+### 3.4 分析火焰图
+
+使用浏览器打开`flame.svg`文件,你将会看到一个火焰图。
+
+你可以通过点击火焰图中的某个函数,来查看它的调用栈。
+
+**你可以右键下面的图片,在新的标签页打开,体验交互效果。**
+
+![](https://web-static2.dragonos.org.cn//longjin/flame2.svg?imageSlim)
+