Browse Source

feat(kexec & initram):Add kexec and initram support for x86 architecture (#1303)

- Support embedding initram and using Ramfs as the file system for extracting initram
- Support kexec series system calls, including load series and reboot
- Support u-root as the root file system to boot in Go language
- Add sysfs such as boot_crams and memmap
- Add a series of peripheral system calls related to the above

Signed-off-by: JensenWei007 <jensenwei007@gmail.com>
JingXuan_Wei 1 day ago
parent
commit
bb99d3f6ed
56 changed files with 3353 additions and 27 deletions
  1. 1 0
      build-scripts/kernel_build/src/cfiles/arch/x86_64.rs
  2. 4 0
      kernel/.gitignore
  3. 17 0
      kernel/Cargo.lock
  4. 6 0
      kernel/Cargo.toml
  5. 9 0
      kernel/build.rs
  6. 3 0
      kernel/initram/create_initram.sh
  7. 17 0
      kernel/src/arch/loongarch64/init/mod.rs
  8. 11 0
      kernel/src/arch/loongarch64/kexec.rs
  9. 2 0
      kernel/src/arch/loongarch64/mod.rs
  10. 15 0
      kernel/src/arch/riscv64/init/dragonstub.rs
  11. 17 0
      kernel/src/arch/riscv64/init/mod.rs
  12. 11 0
      kernel/src/arch/riscv64/kexec.rs
  13. 3 0
      kernel/src/arch/riscv64/mod.rs
  14. 192 0
      kernel/src/arch/x86_64/asm/relocate_kernel_64.S
  15. 454 0
      kernel/src/arch/x86_64/init/boot_params.rs
  16. 2 5
      kernel/src/arch/x86_64/init/mod.rs
  17. 39 0
      kernel/src/arch/x86_64/init/multiboot2.rs
  18. 74 0
      kernel/src/arch/x86_64/init/pvh/mod.rs
  19. 25 10
      kernel/src/arch/x86_64/ipc/signal.rs
  20. 172 0
      kernel/src/arch/x86_64/kexec.rs
  21. 6 0
      kernel/src/arch/x86_64/link.lds
  22. 3 0
      kernel/src/arch/x86_64/mod.rs
  23. 62 0
      kernel/src/filesystem/ramfs/mod.rs
  24. 17 0
      kernel/src/filesystem/vfs/file.rs
  25. 75 0
      kernel/src/filesystem/vfs/mod.rs
  26. 18 0
      kernel/src/filesystem/vfs/mount.rs
  27. 1 0
      kernel/src/filesystem/vfs/syscall/mod.rs
  28. 83 0
      kernel/src/filesystem/vfs/syscall/sys_mknodat.rs
  29. 17 0
      kernel/src/filesystem/vfs/vcore.rs
  30. 236 0
      kernel/src/init/boot.rs
  31. 20 2
      kernel/src/init/initial_kthread.rs
  32. 284 0
      kernel/src/init/initram.rs
  33. 261 0
      kernel/src/init/kexec/kexec_core.rs
  34. 82 0
      kernel/src/init/kexec/mod.rs
  35. 63 0
      kernel/src/init/kexec/syscall.rs
  36. 13 0
      kernel/src/init/mod.rs
  37. 25 0
      kernel/src/ipc/signal_types.rs
  38. 2 0
      kernel/src/ipc/syscall/mod.rs
  39. 91 0
      kernel/src/ipc/syscall/sys_pidfd_sendsignal.rs
  40. 100 0
      kernel/src/ipc/syscall/sys_sigaltstack.rs
  41. 32 0
      kernel/src/libs/decompress.rs
  42. 2 0
      kernel/src/libs/mod.rs
  43. 2 2
      kernel/src/misc/reboot.rs
  44. 151 0
      kernel/src/mm/ident_map.rs
  45. 2 0
      kernel/src/mm/mod.rs
  46. 348 0
      kernel/src/mm/sysfs.rs
  47. 44 0
      kernel/src/process/exit.rs
  48. 38 0
      kernel/src/process/fork.rs
  49. 17 1
      kernel/src/process/mod.rs
  50. 19 0
      kernel/src/process/pid.rs
  51. 2 0
      kernel/src/process/syscall/mod.rs
  52. 16 0
      kernel/src/process/syscall/sys_execve.rs
  53. 74 0
      kernel/src/process/syscall/sys_pidfdopen.rs
  54. 67 0
      kernel/src/process/syscall/sys_waitid.rs
  55. 6 2
      kernel/src/syscall/misc.rs
  56. 0 5
      kernel/src/syscall/mod.rs

+ 1 - 0
build-scripts/kernel_build/src/cfiles/arch/x86_64.rs

@@ -14,6 +14,7 @@ impl CFilesArch for X86_64CFilesArch {
         files.insert(PathBuf::from("src/arch/x86_64/asm/head.S"));
         files.insert(PathBuf::from("src/arch/x86_64/asm/entry.S"));
         files.insert(PathBuf::from("src/arch/x86_64/asm/apu_boot.S"));
+        files.insert(PathBuf::from("src/arch/x86_64/asm/relocate_kernel_64.S"));
         files.insert(PathBuf::from("src/arch/x86_64/vm/vmx/vmenter.S"));
     }
 

+ 4 - 0
kernel/.gitignore

@@ -10,3 +10,7 @@ src/include/bindings/bindings.h
 
 # Build counter
 .build_count
+
+# initram
+initram/*.cpio
+initram/*.cpio.xz

+ 17 - 0
kernel/Cargo.lock

@@ -245,6 +245,15 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
+[[package]]
+name = "cpio_reader"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5793d74f50cf1460a969f028d40963d8d6e01ebae049dea976e944335c7df492"
+dependencies = [
+ "bitflags 2.9.1",
+]
+
 [[package]]
 name = "crc"
 version = "0.1.0"
@@ -424,6 +433,7 @@ dependencies = [
  "bitmap",
  "byte-slice-cast",
  "cfg-if",
+ "cpio_reader",
  "defer",
  "derive_builder",
  "driver_base_macros",
@@ -466,6 +476,7 @@ dependencies = [
  "wait_queue_macros",
  "x86",
  "x86_64",
+ "xz4rust",
 ]
 
 [[package]]
@@ -2087,6 +2098,12 @@ dependencies = [
  "smallvec",
 ]
 
+[[package]]
+name = "xz4rust"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1053c1562c16e3760559c390d3b690ae6b524c200eb9a7ddb5d43c77d000832d"
+
 [[package]]
 name = "yaxpeax-arch"
 version = "0.3.2"

+ 6 - 0
kernel/Cargo.toml

@@ -28,6 +28,9 @@ static_keys_test = []
 # kstack_protect 开启该功能后,会开启内核栈保护功能。用于辅助检测栈溢出。(内核栈占用会*2)
 kstack_protect = []
 
+# initram
+initram = []
+
 # 运行时依赖项
 [dependencies]
 acpi = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/acpi-rs.git", rev = "282df2af7b" }
@@ -79,6 +82,9 @@ printf-compat = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/
 
 static-keys = { version = "=0.7" }
 
+cpio_reader = "0.1.2"
+xz4rust = { version = "0.2.1", default-features = false, features = ["alloc"] }
+
 defer = "0.2.1"
 cfg-if = { version = "1.0.0" }
 derive_builder = { version = "0.20.2", default-features = false, features = [

+ 9 - 0
kernel/build.rs

@@ -1,3 +1,12 @@
+use std::env;
+use std::path::Path;
+
 fn main() {
+    let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
+    let path = format!("{}/initram/x86.cpio.xz", manifest_dir);
+    if Path::new(&path).exists() {
+        println!("cargo:rustc-cfg=has_initram_x86");
+    }
+
     kernel_build::run();
 }

+ 3 - 0
kernel/initram/create_initram.sh

@@ -0,0 +1,3 @@
+#!/bin/bash
+
+echo "此脚本暂时废置, 请使用 DragonBoot 仓库进行构建"

+ 17 - 0
kernel/src/arch/loongarch64/init/mod.rs

@@ -8,6 +8,23 @@ pub struct ArchBootParams {}
 
 impl ArchBootParams {
     pub const DEFAULT: Self = ArchBootParams {};
+
+    pub fn set_alt_mem_k(&mut self, _alt_mem_k: u32) {}
+
+    pub fn set_scratch(&mut self, _scratch: u32) {}
+
+    pub fn add_e820_entry(&mut self, _addr: u64, _size: u64, _mtype: u32) {}
+
+    pub fn init_setupheader(&mut self) {}
+
+    pub fn convert_to_buf(&self) -> &[u8] {
+        unsafe {
+            core::slice::from_raw_parts(
+                (self as *const Self) as *const u8,
+                core::mem::size_of::<Self>(),
+            )
+        }
+    }
 }
 
 #[inline(never)]

+ 11 - 0
kernel/src/arch/loongarch64/kexec.rs

@@ -0,0 +1,11 @@
+use crate::init::kexec::Kimage;
+use crate::libs::spinlock::SpinLock;
+use alloc::rc::Rc;
+
+pub fn machine_kexec_prepare(kimage: Rc<SpinLock<Kimage>>) -> bool {
+    false
+}
+
+pub fn init_pgtable(kimage: Rc<SpinLock<Kimage>>) {}
+
+pub fn machine_kexec(kimage: Rc<SpinLock<Kimage>>) {}

+ 2 - 0
kernel/src/arch/loongarch64/mod.rs

@@ -5,6 +5,7 @@ pub mod filesystem;
 pub mod init;
 pub mod interrupt;
 pub mod ipc;
+pub mod kexec;
 pub mod kprobe;
 pub mod mm;
 pub mod msi;
@@ -27,6 +28,7 @@ pub use self::pio::LoongArch64PortIOArch as CurrentPortIOArch;
 pub use self::sched::LoongArch64SchedArch as CurrentSchedArch;
 pub use self::smp::LoongArch64SMPArch as CurrentSMPArch;
 pub use self::time::LoongArch64TimeArch as CurrentTimeArch;
+pub use crate::arch::kexec as KexecArch;
 
 pub fn panic_pre_work() {}
 pub fn panic_post_work() {}

+ 15 - 0
kernel/src/arch/riscv64/init/dragonstub.rs

@@ -38,4 +38,19 @@ impl BootCallbacks for DragonStubCallBack {
         // parsed in `early_init_scan_memory()` and uefi driver
         Ok(())
     }
+
+    fn early_init_memmap_sysfs(&self) -> Result<(), SystemError> {
+        log::error!("riscv64, early_init_memmap_sysfs is not impled");
+        Ok(())
+    }
+
+    fn init_initramfs(&self) -> Result<(), SystemError> {
+        log::error!("riscv64, init_initramfs is not impled");
+        Ok(())
+    }
+
+    fn init_memmap_bp(&self) -> Result<(), SystemError> {
+        log::error!("riscv64, init_memmap_bp is not impled");
+        Ok(())
+    }
 }

+ 17 - 0
kernel/src/arch/riscv64/init/mod.rs

@@ -45,6 +45,23 @@ impl ArchBootParams {
         }
         self.fdt_vaddr.unwrap()
     }
+
+    pub fn set_alt_mem_k(&mut self, _alt_mem_k: u32) {}
+
+    pub fn set_scratch(&mut self, _scratch: u32) {}
+
+    pub fn add_e820_entry(&mut self, _addr: u64, _size: u64, _mtype: u32) {}
+
+    pub fn init_setupheader(&mut self) {}
+
+    pub fn convert_to_buf(&self) -> &[u8] {
+        unsafe {
+            core::slice::from_raw_parts(
+                (self as *const Self) as *const u8,
+                core::mem::size_of::<Self>(),
+            )
+        }
+    }
 }
 
 static mut BOOT_HARTID: u32 = 0;

+ 11 - 0
kernel/src/arch/riscv64/kexec.rs

@@ -0,0 +1,11 @@
+use crate::init::kexec::Kimage;
+use crate::libs::spinlock::SpinLock;
+use alloc::rc::Rc;
+
+pub fn machine_kexec_prepare(kimage: Rc<SpinLock<Kimage>>) -> bool {
+    false
+}
+
+pub fn init_pgtable(kimage: Rc<SpinLock<Kimage>>) {}
+
+pub fn machine_kexec(kimage: Rc<SpinLock<Kimage>>) {}

+ 3 - 0
kernel/src/arch/riscv64/mod.rs

@@ -6,6 +6,7 @@ pub mod filesystem;
 pub mod init;
 pub mod interrupt;
 pub mod ipc;
+pub mod kexec;
 pub mod kprobe;
 mod kvm;
 pub mod mm;
@@ -35,6 +36,8 @@ pub use crate::arch::smp::RiscV64SMPArch as CurrentSMPArch;
 
 pub use crate::arch::sched::RiscV64SchedArch as CurrentSchedArch;
 
+pub use crate::arch::kexec as KexecArch;
+
 pub fn panic_pre_work() {
     unsafe { riscv::register::sstatus::set_fs(riscv::register::sstatus::FS::Initial) };
 }

+ 192 - 0
kernel/src/arch/x86_64/asm/relocate_kernel_64.S

@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * relocate_kernel_64.S - put the kernel image in place to boot
+ * Copyright (C) Jingxuan Wei  <jensenwei007@gmail.com>
+ */
+
+#define X86_CR4_PGE		(1ul << 7)
+#define X86_CR4_CET		(1ul << 23)
+#define X86_CR0_AM          (1UL<<18)
+#define X86_CR0_WP          (1UL<<16)
+#define X86_CR0_TS          (1UL<<3)
+#define X86_CR0_EM          (1UL<<2)
+#define X86_CR0_PG          (1UL<<31)
+#define X86_CR0_PE          (1UL<<0)
+#define X86_CR4_PAE		(1ul << 5)
+#define X86_CR4_LA57		(1ul << 12)
+
+.section .relocate_kernel_data
+.balign 16
+.globl kexec_pa_table_page
+kexec_pa_table_page:
+    .quad 0
+    .type kexec_pa_table_page 1
+    .set .L__sym_size_kexec_pa_table_page, .-kexec_pa_table_page
+    .size kexec_pa_table_page, .L__sym_size_kexec_pa_table_page
+
+.section .relocate_kernel_text
+.code64
+.globl relocate_kernel;   
+relocate_kernel:
+	/*
+	 * %rdi indirection_page
+	 * %rsi start_address
+	 * %rdx stack_page_address
+	 */
+
+	/* zero out flags, and disable interrupts */
+	pushq $0
+	popfq
+
+    /* Switch to the identity mapped page tables */
+	movq	%cr3, %rax
+	movq	kexec_pa_table_page(%rip), %r9
+	movq	%r9, %cr3
+
+    /* Leave CR4 in %r13 to enable the right paging mode later. */
+	movq	%cr4, %r13
+
+	/* Disable global pages immediately to ensure this mapping is RWX */
+	movq	%r13, %r12
+	andq	$~(X86_CR4_PGE), %r12
+	movq	%r12, %cr4
+
+    /* setup a new stack */
+	movq	%rdx, %rsp
+    addq    $4096, %rsp
+
+    /* store the start address on the stack */
+	pushq   %rsi
+
+    /*
+	 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
+	 * below.
+	 */
+	movq	%cr4, %rax
+	andq	$~(X86_CR4_CET), %rax
+	movq	%rax, %cr4
+
+    /*
+	 * Set cr0 to a known state:
+	 *  - Paging enabled
+	 *  - Alignment check disabled
+	 *  - Write protect disabled
+	 *  - No task switch
+	 *  - Don't do FP software emulation.
+	 *  - Protected mode enabled
+	 */
+	movq	%cr0, %rax
+	andq	$~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
+	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
+	movq	%rax, %cr0
+
+    /*
+	 * Set cr4 to a known state:
+	 *  - physical address extension enabled
+	 *  - 5-level paging, if it was enabled before
+	 *  - Machine check exception on TDX guest, if it was enabled before.
+	 *    Clearing MCE might not be allowed in TDX guests, depending on setup.
+	 *
+	 * Use R13 that contains the original CR4 value, read in relocate_kernel().
+	 * PAE is always set in the original CR4.
+	 */
+	andl	$(X86_CR4_PAE | X86_CR4_LA57), %r13d
+	movq	%r13, %cr4
+
+    /* Flush the TLB (needed?) */
+	movq	%r9, %cr3
+
+    call	swap_pages
+
+    /*
+	 * To be certain of avoiding problems with self-modifying code
+	 * I need to execute a serializing instruction here.
+	 * So I flush the TLB by reloading %cr3 here, it's handy,
+	 * and not processor dependent.
+	 */
+	movq	%cr3, %rax
+	movq	%rax, %cr3
+
+	/*
+	 * set all of the registers to known values
+	 * leave %rsp alone
+	 */
+
+	xorl	%eax, %eax
+	xorl	%ebx, %ebx
+	xorl    %ecx, %ecx
+	xorl    %edx, %edx
+	xorl    %esi, %esi
+	xorl    %edi, %edi
+	xorl    %ebp, %ebp
+	xorl	%r8d, %r8d
+	xorl	%r9d, %r9d
+	xorl	%r10d, %r10d
+	xorl	%r11d, %r11d
+	xorl	%r12d, %r12d
+	xorl	%r13d, %r13d
+	xorl	%r14d, %r14d
+	xorl	%r15d, %r15d
+
+    ret
+	int3
+
+.type relocate_kernel 0 ;
+.set .L__sym_size_relocate_kernel, .-relocate_kernel ;
+.size relocate_kernel, .L__sym_size_relocate_kernel
+
+/* Do the copies */
+swap_pages:
+	/*
+	 * %rdi indirection page
+	 */
+    xor %rax, %rax # wjx
+
+	movq	%rdi, %rcx	/* Put the indirection_page in %rcx */
+	xorl	%edi, %edi
+	xorl	%esi, %esi
+	jmp	.Lstart		/* Should start with an indirection record */
+
+.Lloop:	/* top, read another word for the indirection page */
+
+	movq	(%rbx), %rcx
+	addq	$8,	%rbx
+.Lstart:
+	testb	$0x1,	%cl   /* is it a destination page? */
+	jz	.Lnotdest
+	movq	%rcx,	%rdi
+	andq	$0xfffffffffffff000, %rdi
+	jmp	.Lloop
+.Lnotdest:
+	testb	$0x2,	%cl   /* is it an indirection page? */
+	jz	.Lnotind
+	movq	%rcx,   %rbx
+	andq	$0xfffffffffffff000, %rbx
+    #inc %rax #wjx
+    #cmp $15, %rax
+    #jne	.Lloop
+    #je .Lwjx
+    jmp	.Lloop
+.Lnotind:
+	testb	$0x4,	%cl   /* is it the done indicator? */
+	jz	.Lnotdone
+	jmp	.Ldone
+.Lnotdone:
+	testb	$0x8,	%cl   /* is it the source indicator? */
+	jz	.Lloop	      /* Ignore it otherwise */
+
+	movq	%rcx,   %rsi  /* For ever source page do a copy */
+	andq	$0xfffffffffffff000, %rsi
+
+	movl	$512, %ecx
+	rep ; movsq
+
+	jmp	.Lloop
+.Ldone:
+	ret
+	int3
+.Lwjx:
+    jmp .
+.type swap_pages 0 ;
+.set .L__sym_size_swap_pages, .-swap_pages ;
+.size swap_pages, .L__sym_size_swap_pages

+ 454 - 0
kernel/src/arch/x86_64/init/boot_params.rs

@@ -0,0 +1,454 @@
+use core::ffi::{c_uchar, c_uint, c_ulonglong, c_ushort};
+
+#[repr(C, packed)]
+pub struct ScreenInfo {
+    pub orig_x: c_uchar,             /* 0x00 */
+    pub orig_y: c_uchar,             /* 0x01 */
+    pub ext_mem_k: c_ushort,         /* 0x02 */
+    pub orig_video_page: c_ushort,   /* 0x04 */
+    pub orig_video_mode: c_uchar,    /* 0x06 */
+    pub orig_video_cols: c_uchar,    /* 0x07 */
+    pub flags: c_uchar,              /* 0x08 */
+    pub unused2: c_uchar,            /* 0x09 */
+    pub orig_video_ega_bx: c_ushort, /* 0x0a */
+    pub unused3: c_ushort,           /* 0x0c */
+    pub orig_video_lines: c_uchar,   /* 0x0e */
+    pub orig_video_is_vga: c_uchar,  /* 0x0f */
+    pub orig_video_points: c_ushort, /* 0x10 */
+
+    /* VESA graphic mode -- linear frame buffer */
+    pub lfb_width: c_ushort,       /* 0x12 */
+    pub lfb_height: c_ushort,      /* 0x14 */
+    pub lfb_depth: c_ushort,       /* 0x16 */
+    pub lfb_base: c_uint,          /* 0x18 */
+    pub lfb_size: c_uint,          /* 0x1c */
+    pub cl_magic: c_ushort,        /* 0x20 */
+    pub cl_offset: c_ushort,       /* 0x20 + 2 */
+    pub lfb_linelength: c_ushort,  /* 0x24 */
+    pub red_size: c_uchar,         /* 0x26 */
+    pub red_pos: c_uchar,          /* 0x27 */
+    pub green_size: c_uchar,       /* 0x28 */
+    pub green_pos: c_uchar,        /* 0x29 */
+    pub blue_size: c_uchar,        /* 0x2a */
+    pub blue_pos: c_uchar,         /* 0x2b */
+    pub rsvd_size: c_uchar,        /* 0x2c */
+    pub rsvd_pos: c_uchar,         /* 0x2d */
+    pub vesapm_seg: c_ushort,      /* 0x2e */
+    pub vesapm_off: c_ushort,      /* 0x30 */
+    pub pages: c_ushort,           /* 0x32 */
+    pub vesa_attributes: c_ushort, /* 0x34 */
+    pub capabilities: c_uint,      /* 0x36 */
+    pub ext_lfb_base: c_uint,      /* 0x3a */
+    pub _reserved: [c_uchar; 2],   /* 0x3e */
+}
+
+#[repr(C, packed)]
+pub struct ApmBiosInfo {
+    pub version: c_ushort,     /* 0x00 */
+    pub cseg: c_ushort,        /* 0x02 */
+    pub offset: c_uint,        /* 0x04 */
+    pub cseg_16: c_ushort,     /* 0x08 */
+    pub dseg: c_ushort,        /* 0x0a */
+    pub flags: c_ushort,       /* 0x0c */
+    pub cseg_len: c_ushort,    /* 0x0e */
+    pub cseg_16_len: c_ushort, /* 0x10 */
+    pub dseg_len: c_ushort,    /* 0x12 */
+}
+
+#[repr(C, packed)]
+pub struct IstInfo {
+    pub signature: c_uint,  /* 0x00 */
+    pub command: c_uint,    /* 0x04 */
+    pub event: c_uint,      /* 0x08 */
+    pub perf_level: c_uint, /* 0x0c */
+}
+
+#[repr(C, packed)]
+pub struct SysDescTable {
+    pub length: c_ushort,     /* 0x00 */
+    pub table: [c_uchar; 14], /* 0x02 */
+}
+
+#[repr(C, packed)]
+pub struct OlpcOfwHeader {
+    pub ofw_magic: c_uint,      /* OFW signature - 0x00 */
+    pub ofw_version: c_uint,    /* 0x04 */
+    pub cif_handler: c_uint,    /* callback into OFW - 0x08 */
+    pub irq_desc_table: c_uint, /* 0x0c */
+}
+
+#[repr(C, packed)]
+pub struct EdidInfo {
+    pub dummy: [u8; 128],
+}
+
+#[repr(C, packed)]
+pub struct EfiInfo {
+    pub efi_loader_signature: c_uint, /* 0x00 */
+    pub efi_systab: c_uint,           /* 0x04 */
+    pub efi_memdesc_size: c_uint,     /* 0x08 */
+    pub efi_memdesc_version: c_uint,  /* 0x0c */
+    pub efi_memmap: c_uint,           /* 0x10 */
+    pub efi_memmap_size: c_uint,      /* 0x14 */
+    pub efi_systab_hi: c_uint,        /* 0x18 */
+    pub efi_memmap_hi: c_uint,        /* 0x1c */
+}
+
+#[repr(C, packed)]
+pub struct SetupHeader {
+    pub setup_sects: c_uchar,               /* 0x00 */
+    pub root_flags: c_ushort,               /* 0x01 */
+    pub syssize: c_uint,                    /* 0x03 */
+    pub ram_size: c_ushort,                 /* 0x07 */
+    pub vid_mode: c_ushort,                 /* 0x09 */
+    pub root_dev: c_ushort,                 /* 0x0b */
+    pub boot_flag: c_ushort,                /* 0x0d */
+    pub jump: c_ushort,                     /* 0x0f */
+    pub header: c_uint,                     /* 0x11 */
+    pub version: c_ushort,                  /* 0x15 */
+    pub realmode_swtch: c_uint,             /* 0x17 */
+    pub start_sys_seg: c_ushort,            /* 0x1b */
+    pub kernel_version: c_ushort,           /* 0x1d */
+    pub type_of_loader: c_uchar,            /* 0x1f */
+    pub loadflags: c_uchar,                 /* 0x20 */
+    pub setup_move_size: c_ushort,          /* 0x21 */
+    pub code32_start: c_uint,               /* 0x23 */
+    pub ramdisk_image: c_uint,              /* 0x27 */
+    pub ramdisk_size: c_uint,               /* 0x2b */
+    pub bootsect_kludge: c_uint,            /* 0x2f */
+    pub heap_end_ptr: c_ushort,             /* 0x33 */
+    pub ext_loader_ver: c_uchar,            /* 0x35 */
+    pub ext_loader_type: c_uchar,           /* 0x36 */
+    pub cmd_line_ptr: c_uint,               /* 0x37 */
+    pub initrd_addr_max: c_uint,            /* 0x3b */
+    pub kernel_alignment: c_uint,           /* 0x3f */
+    pub relocatable_kernel: c_uchar,        /* 0x43 */
+    pub min_alignment: c_uchar,             /* 0x44 */
+    pub xloadflags: c_ushort,               /* 0x45 */
+    pub cmdline_size: c_uint,               /* 0x47 */
+    pub hardware_subarch: c_uint,           /* 0x4b */
+    pub hardware_subarch_data: c_ulonglong, /* 0x4f */
+    pub payload_offset: c_uint,             /* 0x57 */
+    pub payload_length: c_uint,             /* 0x5b */
+    pub setup_data: c_ulonglong,            /* 0x5f */
+    pub pref_address: c_ulonglong,          /* 0x67 */
+    pub init_size: c_uint,                  /* 0x6f */
+    pub handover_offset: c_uint,            /* 0x73 */
+    pub kernel_info_offset: c_uint,         /* 0x77 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct BootE820Entry {
+    pub addr: u64,  /* 0x00 */
+    pub size: u64,  /* 0x08 */
+    pub type_: u32, /* 0x10 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddDeviceParams {
+    pub length: c_ushort,                 /* 0x00 */
+    pub info_flags: c_ushort,             /* 0x02 */
+    pub num_default_cylinders: c_uint,    /* 0x04 */
+    pub num_default_heads: c_uint,        /* 0x08 */
+    pub sectors_per_track: c_uint,        /* 0x0c */
+    pub number_of_sectors: c_ulonglong,   /* 0x10 */
+    pub bytes_per_sector: c_ushort,       /* 0x18 */
+    pub dpte_ptr: c_uint,                 /* 0x1a */
+    pub key: c_ushort,                    /* 0x1e */
+    pub device_path_info_length: c_uchar, /* 0x20 */
+    pub reserved2: c_uchar,               /* 0x21 */
+    pub reserved3: c_ushort,              /* 0x22 */
+    pub host_bus_type: [c_uchar; 4],      /* 0x24 */
+    pub interface_type: [c_uchar; 8],     /* 0x28 */
+    pub interface_path: EddInterfacePath, /* 0x30 */
+    pub device_path: EddDevicePath,       /* 0x38 */
+    pub reserved4: c_uchar,               /* 0x48 */
+    pub checksum: c_uchar,                /* 0x49 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub union EddInterfacePath {
+    pub isa: EddIsaPath,
+    pub pci: EddPciPath,
+    pub ibnd: EddIbndPath,
+    pub xprs: EddXprsPath,
+    pub htpt: EddHtptPath,
+    pub unknown: EddUnknownPath,
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddIsaPath {
+    pub base_address: c_ushort, /* 0x00 */
+    pub reserved1: c_ushort,    /* 0x02 */
+    pub reserved2: c_uint,      /* 0x04 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddPciPath {
+    pub bus: c_uchar,      /* 0x00 */
+    pub slot: c_uchar,     /* 0x01 */
+    pub function: c_uchar, /* 0x02 */
+    pub channel: c_uchar,  /* 0x03 */
+    pub reserved: c_uint,  /* 0x04 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddIbndPath {
+    pub reserved: c_ulonglong, /* 0x00 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddXprsPath {
+    pub reserved: c_ulonglong, /* 0x00 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddHtptPath {
+    pub reserved: c_ulonglong, /* 0x00 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddUnknownPath {
+    pub reserved: c_ulonglong, /* 0x00 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub union EddDevicePath {
+    pub ata: EddAtaPath,
+    pub atapi: EddAtapiPath,
+    pub scsi: EddScsiPath,
+    pub usb: EddUsbPath,
+    pub i1394: EddI1394Path,
+    pub fibre: EddFibrePath,
+    pub i2o: EddI2oPath,
+    pub raid: EddRaidPath,
+    pub sata: EddSataPath,
+    pub unknown: EddUnknownDevicePath,
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddAtaPath {
+    pub device: c_uchar,        /* 0x00 */
+    pub reserved1: c_uchar,     /* 0x01 */
+    pub reserved2: c_ushort,    /* 0x02 */
+    pub reserved3: c_uint,      /* 0x04 */
+    pub reserved4: c_ulonglong, /* 0x08 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddAtapiPath {
+    pub device: c_uchar,        /* 0x00 */
+    pub lun: c_uchar,           /* 0x01 */
+    pub reserved1: c_uchar,     /* 0x02 */
+    pub reserved2: c_uchar,     /* 0x03 */
+    pub reserved3: c_uint,      /* 0x04 */
+    pub reserved4: c_ulonglong, /* 0x08 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddScsiPath {
+    pub id: c_ushort,        /* 0x00 */
+    pub lun: c_ulonglong,    /* 0x02 */
+    pub reserved1: c_ushort, /* 0x0a */
+    pub reserved2: c_uint,   /* 0x0c */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddUsbPath {
+    pub serial_number: c_ulonglong, /* 0x00 */
+    pub reserved: c_ulonglong,      /* 0x08 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddI1394Path {
+    pub eui: c_ulonglong,      /* 0x00 */
+    pub reserved: c_ulonglong, /* 0x08 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddFibrePath {
+    pub wwid: c_ulonglong, /* 0x00 */
+    pub lun: c_ulonglong,  /* 0x08 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddI2oPath {
+    pub identity_tag: c_ulonglong, /* 0x00 */
+    pub reserved: c_ulonglong,     /* 0x08 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddRaidPath {
+    pub array_number: c_uint,   /* 0x00 */
+    pub reserved1: c_uint,      /* 0x04 */
+    pub reserved2: c_ulonglong, /* 0x08 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddSataPath {
+    pub device: c_uchar,        /* 0x00 */
+    pub reserved1: c_uchar,     /* 0x01 */
+    pub reserved2: c_ushort,    /* 0x02 */
+    pub reserved3: c_uint,      /* 0x04 */
+    pub reserved4: c_ulonglong, /* 0x08 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddUnknownDevicePath {
+    pub reserved1: c_ulonglong, /* 0x00 */
+    pub reserved2: c_ulonglong, /* 0x08 */
+}
+
+#[repr(C, packed)]
+#[derive(Copy, Clone)]
+pub struct EddInfo {
+    pub device: c_uchar,                   /* 0x00 */
+    pub version: c_uchar,                  /* 0x01 */
+    pub interface_support: c_ushort,       /* 0x02 */
+    pub legacy_max_cylinder: c_ushort,     /* 0x04 */
+    pub legacy_max_head: c_uchar,          /* 0x06 */
+    pub legacy_sectors_per_track: c_uchar, /* 0x07 */
+    pub params: EddDeviceParams,           /* 0x08 */
+}
+
+/// 对齐 Linux 的 boot_params
+/// https://code.dragonos.org.cn/xref/linux-6.1.9/arch/x86/include/uapi/asm/bootparam.h#185
+#[repr(C, packed)]
+pub struct ArchBootParams {
+    pub screen_info: ScreenInfo,     /* 0x000 */
+    pub apm_bios_info: ApmBiosInfo,  /* 0x040 */
+    pub _pad2: [c_uchar; 4],         /* 0x054 */
+    pub tboot_addr: c_ulonglong,     /* 0x058 */
+    pub ist_info: IstInfo,           /* 0x060 */
+    pub acpi_rsdp_addr: c_ulonglong, /* 0x070 */
+    pub _pad3: [c_uchar; 8],         /* 0x078 */
+    pub hd0_info: [c_uchar; 16],     /* obsolete! */
+    /* 0x080 */
+    pub hd1_info: [c_uchar; 16], /* obsolete! */
+    /* 0x090 */
+    pub sys_desc_table: SysDescTable, /* obsolete! */
+    /* 0x0a0 */
+    pub olpc_ofw_header: OlpcOfwHeader,   /* 0x0b0 */
+    pub ext_ramdisk_image: c_uint,        /* 0x0c0 */
+    pub ext_ramdisk_size: c_uint,         /* 0x0c4 */
+    pub ext_cmd_line_ptr: c_uint,         /* 0x0c8 */
+    pub _pad4: [c_uchar; 112],            /* 0x0cc */
+    pub cc_blob_address: c_uint,          /* 0x13c */
+    pub edid_info: EdidInfo,              /* 0x140 */
+    pub efi_info: EfiInfo,                /* 0x1c0 */
+    pub alt_mem_k: c_uint,                /* 0x1e0 */
+    pub scratch: c_uint,                  /* 0x1e4 */
+    pub e820_entries: c_uchar,            /* 0x1e8 */
+    pub eddbuf_entries: c_uchar,          /* 0x1e9 */
+    pub edd_mbr_sig_buf_entries: c_uchar, /* 0x1ea */
+    pub kbd_status: c_uchar,              /* 0x1eb */
+    pub secure_boot: c_uchar,             /* 0x1ec */
+    pub _pad5: [c_uchar; 2],              /* 0x1ed */
+    pub sentinel: c_uchar,                /* 0x1ef */
+    pub _pad6: [c_uchar; 1],              /* 0x1f0 */
+    pub hdr: SetupHeader,                 /* 0x1f1 */
+    pub _pad7: [c_uchar; 0x290 - 0x1f1 - core::mem::size_of::<SetupHeader>()], /* 0x290 - 0x1f1 - sizeof(struct setup_header) */
+    pub edd_mbr_sig_buffer: [c_uint; 16],                                      /* 0x290 */
+    pub e820_table: [BootE820Entry; 128],                                      /* 0x2d0 */
+    pub _pad8: [c_uchar; 48],                                                  /* 0xcd0 */
+    pub eddbuf: [EddInfo; 6],                                                  /* 0xd00 */
+    pub _pad9: [c_uchar; 276],                                                 /* 0xeec */
+}
+
+impl core::fmt::Debug for ArchBootParams {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "Struct ArchBootParams(x86) do not support Debug!")
+    }
+}
+
+// Linux 非0的字段有:
+// Sceen_info(为0不影响)
+// IstInfo(为0不影响)
+// acpi_rsdp_addr(为0不影响)
+// alt_mem_k  0x7fb40
+// scratch 0x10000d
+// e820_entries 0x09
+// SetupHeader(重要!非常重要)
+// e820_table(与上面的e820_entries数量对应)(这个就是/sys/firmware/memmap)
+impl ArchBootParams {
+    pub const DEFAULT: Self =
+        unsafe { core::mem::MaybeUninit::<ArchBootParams>::zeroed().assume_init() };
+
+    pub fn set_alt_mem_k(&mut self, alt_mem_k: u32) {
+        self.alt_mem_k = alt_mem_k;
+    }
+
+    pub fn set_scratch(&mut self, scratch: u32) {
+        self.scratch = scratch;
+    }
+
+    pub fn add_e820_entry(&mut self, addr: u64, size: u64, mtype: u32) {
+        let entry = BootE820Entry {
+            addr,
+            size,
+            type_: mtype,
+        };
+        self.e820_entries += 1;
+        self.e820_table[self.e820_entries as usize] = entry;
+    }
+
+    pub fn init_setupheader(&mut self) {
+        // 不设置就为0
+        // 下面的是根据同等 qemu 环境(日期为2025.10.15)在启动 Linux 的值
+        // 应该改成自己内核在初始化的过程中获得的值(部分值是需要写死的, 但不应该全部写死)
+        self.hdr.setup_sects = 0x40;
+        self.hdr.root_flags = 0xfb07;
+        self.hdr.syssize = 0x00000d00;
+        self.hdr.ram_size = 0x1000;
+        self.hdr.vid_mode = 0x09;
+        self.hdr.jump = 0xaa55;
+        self.hdr.header = 0x53726448;
+        self.hdr.version = 0x020f;
+        self.hdr.start_sys_seg = 0x1000;
+        self.hdr.kernel_version = 0x42a0;
+        self.hdr.type_of_loader = 0xb0;
+        self.hdr.loadflags = 0x83;
+        self.hdr.setup_move_size = 0x8000;
+        self.hdr.code32_start = 0x10000000;
+        self.hdr.ramdisk_image = 0x00100000;
+        self.hdr.ramdisk_size = 0x1eee6000;
+        self.hdr.bootsect_kludge = 0x010e9eb0;
+        self.hdr.heap_end_ptr = 0xfe00;
+        self.hdr.cmd_line_ptr = 0x20000;
+        self.hdr.initrd_addr_max = 0x7fffffff;
+        self.hdr.kernel_alignment = 0x00200000;
+        self.hdr.relocatable_kernel = 0x1;
+        self.hdr.min_alignment = 0x15;
+        self.hdr.xloadflags = 0x007f;
+        self.hdr.cmdline_size = 0x7ff;
+    }
+
+    pub fn convert_to_buf(&self) -> &[u8] {
+        unsafe {
+            core::slice::from_raw_parts(
+                (self as *const Self) as *const u8,
+                core::mem::size_of::<Self>(),
+            )
+        }
+    }
+}

+ 2 - 5
kernel/src/arch/x86_64/init/mod.rs

@@ -28,12 +28,9 @@ mod boot;
 mod multiboot2;
 mod pvh;
 
-#[derive(Debug)]
-pub struct ArchBootParams {}
+mod boot_params;
 
-impl ArchBootParams {
-    pub const DEFAULT: Self = ArchBootParams {};
-}
+pub use self::boot_params::ArchBootParams;
 
 extern "C" {
     static mut GDT_Table: [usize; 0usize];

+ 39 - 0
kernel/src/arch/x86_64/init/multiboot2.rs

@@ -211,6 +211,45 @@ impl BootCallbacks for Mb2Callback {
 
         Ok(())
     }
+
+    fn init_initramfs(&self) -> Result<(), SystemError> {
+        log::error!("x86 mb2, init_initramfs is not impled");
+        Ok(())
+    }
+
+    fn early_init_memmap_sysfs(&self) -> Result<(), SystemError> {
+        // 没测试过, 可能有问题
+        crate::mm::sysfs::early_memmap_init();
+
+        let mb2_info = MB2_INFO.get();
+        let mem_regions_tag = mb2_info
+            .memory_map_tag()
+            .expect("MB2: Memory map tag not found!");
+
+        for (i, region) in mem_regions_tag.memory_areas().iter().enumerate() {
+            let t = match MemoryAreaType::from(region.typ()) {
+                MemoryAreaType::Available => 1,
+                MemoryAreaType::Reserved => 2,
+                MemoryAreaType::AcpiAvailable => 3,
+                _ => 4,
+            };
+
+            let memmapd = crate::mm::sysfs::MemmapDesc::new(
+                i.to_string(),
+                region.start_address() as usize,
+                (region.start_address() + region.size()) as usize,
+                t,
+            );
+            crate::mm::sysfs::memmap_desc_manager().insert(i, memmapd);
+        }
+
+        Ok(())
+    }
+
+    fn init_memmap_bp(&self) -> Result<(), SystemError> {
+        log::error!("mb2, init_memmap_bp is not impled");
+        Ok(())
+    }
 }
 
 impl Mb2Callback {

+ 74 - 0
kernel/src/arch/x86_64/init/pvh/mod.rs

@@ -55,6 +55,11 @@ impl BootCallbacks for PvhBootCallback {
         Ok(())
     }
 
+    fn init_initramfs(&self) -> Result<(), SystemError> {
+        log::error!("x86 pvh, init_initramfs is not impled");
+        Ok(())
+    }
+
     fn early_init_framebuffer_info(
         &self,
         _scinfo: &mut BootTimeScreenInfo,
@@ -119,6 +124,75 @@ impl BootCallbacks for PvhBootCallback {
         );
         Ok(())
     }
+
+    fn early_init_memmap_sysfs(&self) -> Result<(), SystemError> {
+        crate::mm::sysfs::early_memmap_init();
+
+        let start_info = START_INFO.get();
+        if (start_info.version > 0) && start_info.memmap_entries > 0 {
+            let mut ep = unsafe {
+                MMArch::phys_2_virt(PhysAddr::new(start_info.memmap_paddr as usize)).unwrap()
+            }
+            .data() as *const HvmMemmapTableEntry;
+
+            for i in 0..start_info.memmap_entries {
+                let entry = unsafe { *ep };
+
+                let t = match E820Type::from(entry.type_) {
+                    E820Type::Ram => 1,
+                    E820Type::Reserved => 2,
+                    E820Type::Acpi => 3,
+                    _ => 4,
+                };
+
+                let memmapd = crate::mm::sysfs::MemmapDesc::new(
+                    i.to_string(),
+                    entry.addr as usize,
+                    (entry.addr + entry.size) as usize,
+                    t,
+                );
+                crate::mm::sysfs::memmap_desc_manager().insert(i as usize, memmapd);
+
+                ep = unsafe { ep.add(1) };
+            }
+        }
+
+        Ok(())
+    }
+
+    fn init_memmap_bp(&self) -> Result<(), SystemError> {
+        let start_info = START_INFO.get();
+        if (start_info.version > 0) && start_info.memmap_entries > 0 {
+            let mut ep = unsafe {
+                MMArch::phys_2_virt(PhysAddr::new(start_info.memmap_paddr as usize)).unwrap()
+            }
+            .data() as *const HvmMemmapTableEntry;
+
+            for _ in 0..start_info.memmap_entries {
+                let entry = unsafe { *ep };
+
+                let t = match E820Type::from(entry.type_) {
+                    E820Type::Ram => 1,
+                    E820Type::Reserved => 2,
+                    E820Type::Acpi => 3,
+                    E820Type::Nvs => 4,
+                    E820Type::Unusable => 5,
+                    E820Type::Pmem => 7,
+                    E820Type::Pram => 12,
+                    E820Type::SoftReserved => 0xefffffff,
+                    E820Type::ReservedKern => 128,
+                };
+
+                boot_params()
+                    .write_irqsave()
+                    .arch
+                    .add_e820_entry(entry.addr, entry.size, t);
+
+                ep = unsafe { ep.add(1) };
+            }
+        }
+        Ok(())
+    }
 }
 
 #[inline(never)]

+ 25 - 10
kernel/src/arch/x86_64/ipc/signal.rs

@@ -67,8 +67,8 @@ pub struct SigFrame {
 pub struct SigContext {
     /// sigcontext的标志位
     pub sc_flags: u64,
-    pub sc_stack: SigStack, // 信号处理程序备用栈信息
-    pub frame: TrapFrame,   // 暂存的系统调用/中断返回时,原本要弹出的内核栈帧
+    pub sc_stack: X86SigStack, // 信号处理程序备用栈信息
+    pub frame: TrapFrame,      // 暂存的系统调用/中断返回时,原本要弹出的内核栈帧
     // pub trap_num: u64,    // 用来保存线程结构体中的trap_num字段
     pub oldmask: SigSet, // 暂存的执行信号处理函数之前的,被设置block的信号
     pub cr2: u64,        // 用来保存线程结构体中的cr2字段
@@ -134,7 +134,7 @@ impl SigContext {
 /// @brief 信号处理备用栈的信息
 #[allow(dead_code)]
 #[derive(Debug, Clone, Copy)]
-pub struct SigStack {
+pub struct X86SigStack {
     pub sp: *mut c_void,
     pub flags: u32,
     pub size: u32,
@@ -457,8 +457,8 @@ fn setup_frame(
             return Err(SystemError::EINVAL);
         }
     }
-    let frame: *mut SigFrame = get_stack(trap_frame, size_of::<SigFrame>());
-
+    let frame: *mut SigFrame = get_stack(sigaction, trap_frame, size_of::<SigFrame>());
+    // debug!("frame=0x{:016x}", frame as usize);
     // 要求这个frame的地址位于用户空间,因此进行校验
     let r: Result<UserBufferWriter<'_>, SystemError> =
         UserBufferWriter::new(frame, size_of::<SigFrame>(), true);
@@ -529,16 +529,31 @@ fn setup_frame(
 }
 
 #[inline(always)]
-fn get_stack(frame: &TrapFrame, size: usize) -> *mut SigFrame {
+fn get_stack(sigaction: &mut Sigaction, frame: &TrapFrame, size: usize) -> *mut SigFrame {
     // TODO:在 linux 中会根据 Sigaction 中的一个flag 的值来确定是否使用pcb中的 signal 处理程序备用堆栈,现在的
     // pcb中也没有这个备用堆栈
 
-    // 默认使用 用户栈的栈顶指针-128字节的红区-sigframe的大小 并且16字节对齐
-    let mut rsp: usize = (frame.rsp as usize) - 128 - size;
+    // 目前对于备用栈的实现不完善, 需要补全, 来自https://code.dragonos.org.cn/xref/linux-6.1.9/arch/x86/kernel/signal.c#241
+    let mut _entering_altstack = false;
+    let binding = ProcessManager::current_pcb();
+    let stack = binding.sig_altstack();
+
+    let mut _rsp: usize = 0;
+
+    // 检查是否使用备用栈
+    if sigaction.flags().contains(SigFlags::SA_ONSTACK) {
+        // 这里还需要检查当前是否在信号栈上, 未实现
+        _rsp = stack.sp + stack.size as usize - size; // 栈指向顶部, 与 else 中一样, 需要减 size
+        _entering_altstack = true;
+    } else {
+        // 这里 else 的判断条件也没实现全, 同样未实现, 应该使用 else if
+        // 默认使用 用户栈的栈顶指针-128字节的红区-sigframe的大小 并且16字节对齐
+        _rsp = (frame.rsp as usize) - 128 - size;
+    }
     // 按照要求进行对齐,别问为什么减8,不减8就是错的,可以看
     // https://sourcegraph.com/github.com/torvalds/linux@dd72f9c7e512da377074d47d990564959b772643/-/blob/arch/x86/kernel/signal.c?L124
     // 我猜测是跟x86汇编的某些弹栈行为有关系,它可能会出于某种原因递增 rsp
-    rsp &= (!(STACK_ALIGN - 1)) as usize - 8;
+    _rsp &= (!(STACK_ALIGN - 1)) as usize - 8;
     // rsp &= (!(STACK_ALIGN - 1)) as usize;
-    return rsp as *mut SigFrame;
+    return _rsp as *mut SigFrame;
 }

+ 172 - 0
kernel/src/arch/x86_64/kexec.rs

@@ -0,0 +1,172 @@
+use crate::arch::MMArch;
+use crate::init::boot_params;
+use crate::init::kexec::Kimage;
+use crate::libs::spinlock::SpinLock;
+use crate::mm::ident_map::{ident_map_page, ident_map_pages, ident_pt_alloc};
+use crate::mm::kernel_mapper::KernelMapper;
+use crate::mm::MemoryManagementArch;
+use crate::mm::{page::EntryFlags, PhysAddr};
+use alloc::rc::Rc;
+use core::mem::transmute;
+
+type RelocateKernelFn =
+    unsafe extern "C" fn(indirection_page: usize, start_address: usize, stack_page_address: usize);
+
+pub fn machine_kexec_prepare(kimage: Rc<SpinLock<Kimage>>) -> bool {
+    unsafe {
+        unsafe extern "C" {
+            unsafe fn __relocate_kernel_start();
+            unsafe fn __relocate_kernel_end();
+        }
+        let reloc_start = __relocate_kernel_start as usize;
+        let reloc_end = __relocate_kernel_end as usize;
+
+        if reloc_end - reloc_start > MMArch::PAGE_SIZE {
+            panic!("Kexec: relocate_kernel func is bigger than PAGE_SIZE");
+        }
+
+        let control_page_phys = kimage
+            .lock()
+            .control_code_page
+            .clone()
+            .unwrap()
+            .phys_address();
+        let virt = MMArch::phys_2_virt(control_page_phys).unwrap().data();
+
+        core::ptr::copy(
+            reloc_start as *mut u8,
+            virt as *mut u8,
+            reloc_end - reloc_start,
+        );
+
+        // 搬运 kernel_cmdline
+        // Linux 下 boot_params(zero page) 会被加载到 0x1000, 覆盖当前 x86 的 bootloader pvh 的参数范围(0x11a0)
+        // 这里与 Linux 相同, 写死放到 0x20000
+        let cmdline_ptr = boot_params().read().arch.hdr.cmd_line_ptr as usize;
+        let phys = PhysAddr::new(cmdline_ptr);
+        let virt = MMArch::phys_2_virt(phys).unwrap();
+        let mut kernel_mapper = KernelMapper::lock();
+        kernel_mapper
+            .map_phys_with_size(
+                virt,
+                phys,
+                MMArch::PAGE_SIZE,
+                EntryFlags::from_data(
+                    MMArch::ENTRY_FLAG_PRESENT
+                        | MMArch::ENTRY_FLAG_READWRITE
+                        | MMArch::ENTRY_FLAG_GLOBAL
+                        | MMArch::ENTRY_FLAG_DIRTY
+                        | MMArch::ENTRY_FLAG_ACCESSED,
+                ),
+                true,
+            )
+            .unwrap();
+        let slice = core::slice::from_raw_parts_mut(virt.data() as *mut u8, 2048);
+        slice.fill(0);
+        // 这里先使用固定的写死的 cmdline, 后续等 DragonOS 的切换设置没问题了让 linux 能完成初始化的时候改成与 DragonOS 一样就行
+        let mess = "console=ttyS0 earlyprintk=serial,ttyS0,115200";
+        let mut mess_buf = mess.as_bytes().to_vec();
+        mess_buf.resize(2048, 0);
+        slice.copy_from_slice(&mess_buf);
+    }
+    true
+}
+
+pub fn init_pgtable(kimage: Rc<SpinLock<Kimage>>) {
+    let pgd = ident_pt_alloc();
+    kimage.lock().pgd = pgd;
+
+    unsafe extern "C" {
+        pub unsafe static mut kexec_pa_table_page: u64;
+    }
+
+    unsafe {
+        kexec_pa_table_page = pgd as u64;
+    }
+
+    let nr_segments = kimage.lock().nr_segments;
+
+    // mems
+    for i in 0..nr_segments {
+        let addr = kimage.lock().segment[i].mem;
+        let size = kimage.lock().segment[i].memsz;
+        // TODO:处理可能不是页面整数的情况, 但是目前, 传入的参数都是在用户层页面对其和取整了
+        let pages_nums = size / MMArch::PAGE_SIZE;
+        ident_map_pages(pgd, addr, addr, pages_nums);
+    }
+
+    // pages
+    // 这里需要说明一下, linux 中的操作为使用 GFP_HIGHUSER 分配的页面, 位于高内存(如 4G 空间中的 2 - 4G 高地址空间)
+    // 详细代码为https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/kexec_core.c#802
+    // 随后 linux 把 pfn 映射了, 在https://code.dragonos.org.cn/xref/linux-6.1.9/arch/x86/kernel/machine_kexec_64.c#219
+    // 我个人打点日志输出 linux 映射的区域为 [0, 7ffdf000] 和 [100000000, 180000000], 分别为低 2G 和高 2G (qemu启动为4G)
+    // 其中 [100000000, 180000000] 会影响程序 kexec 的运行(如果注释掉那么就不能切内核了)
+    // 但是目前 DragonOS 没有这么细的管理, 甚至内存分配都不支持 flags, 所以先这么用着
+    let len = kimage.lock().pages.len();
+    for i in 0..len {
+        let page = kimage.lock().pages[i].clone();
+        let addr = page.phys_address().data();
+        ident_map_page(pgd, addr, addr);
+    }
+
+    // efi
+    // map_efi_systab()
+
+    // ACPI
+    // map_acpi_tables()
+
+    // control_page
+    let control_page_pa = kimage
+        .lock()
+        .control_code_page
+        .clone()
+        .unwrap()
+        .phys_address();
+    ident_map_page(
+        pgd,
+        unsafe { MMArch::phys_2_virt(control_page_pa).unwrap().data() },
+        control_page_pa.data(),
+    );
+
+    // cmdline
+    let cmdline_ptr = boot_params().read().arch.hdr.cmd_line_ptr as usize;
+    ident_map_page(pgd, cmdline_ptr, cmdline_ptr);
+}
+
+pub fn machine_kexec(kimage: Rc<SpinLock<Kimage>>) {
+    unsafe extern "C" {
+        unsafe fn relocate_kernel();
+        unsafe fn __relocate_kernel_start();
+    }
+
+    let control_page_virt = unsafe {
+        MMArch::phys_2_virt(
+            kimage
+                .lock()
+                .control_code_page
+                .clone()
+                .unwrap()
+                .phys_address(),
+        )
+        .unwrap()
+        .data()
+    };
+    let relocate_kernel_ptr: usize =
+        control_page_virt + relocate_kernel as usize - __relocate_kernel_start as usize;
+
+    let relocate_kernel_func: RelocateKernelFn = unsafe { transmute(relocate_kernel_ptr) };
+
+    let arg1 = kimage.lock().head;
+    let arg2 = kimage.lock().start;
+    let arg3 = kimage
+        .lock()
+        .stack_page
+        .clone()
+        .unwrap()
+        .phys_address()
+        .data();
+
+    unsafe { relocate_kernel_func(arg1, arg2, arg3) };
+
+    panic!("Kexec should not run to here!");
+}

+ 6 - 0
kernel/src/arch/x86_64/link.lds

@@ -47,6 +47,12 @@ SECTIONS
 		_data = .;
 		*(.data)
 		*(.data.*)
+
+		. = ALIGN(0x100);
+		__relocate_kernel_start = .;
+		*(.relocate_kernel_text)
+    	*(.relocate_kernel_data)
+		__relocate_kernel_end = .;
 		
 		_edata = .;
 	}

+ 3 - 0
kernel/src/arch/x86_64/mod.rs

@@ -9,6 +9,7 @@ pub mod fpu;
 pub mod init;
 pub mod interrupt;
 pub mod ipc;
+pub mod kexec;
 pub mod kprobe;
 pub mod libs;
 pub mod mm;
@@ -51,5 +52,7 @@ pub use crate::arch::vm::x86_kvm_ops as kvm_arch_ops;
 pub use crate::arch::vm::kvm_host::vcpu::X86VcpuArch as VirtCpuArch;
 pub use crate::arch::vm::kvm_host::KvmVcpuStat as VirtCpuStat;
 
+pub use crate::arch::kexec as KexecArch;
+
 pub fn panic_pre_work() {}
 pub fn panic_post_work() {}

+ 62 - 0
kernel/src/filesystem/ramfs/mod.rs

@@ -59,6 +59,8 @@ pub struct RamFSInode {
     self_ref: Weak<LockedRamFSInode>,
     /// 子Inode的B树
     children: BTreeMap<DName, Arc<LockedRamFSInode>>,
+    /// 子Syms的B树, 记录符号到符号的链接
+    syms: BTreeMap<DName, DName>,
     /// 当前inode的数据部分
     data: Vec<u8>,
     /// 当前inode的元数据
@@ -77,6 +79,7 @@ impl RamFSInode {
             parent: Weak::default(),
             self_ref: Weak::default(),
             children: BTreeMap::new(),
+            syms: BTreeMap::new(),
             data: Vec::new(),
             metadata: Metadata {
                 dev_id: 0,
@@ -328,6 +331,7 @@ impl IndexNode for LockedRamFSInode {
             parent: inode.self_ref.clone(),
             self_ref: Weak::default(),
             children: BTreeMap::new(),
+            syms: BTreeMap::new(),
             data: Vec::new(),
             metadata: Metadata {
                 dev_id: 0,
@@ -416,6 +420,63 @@ impl IndexNode for LockedRamFSInode {
         return Ok(());
     }
 
+    fn symlink(
+        &self,
+        name1: &str,
+        name2: &str,
+        other: &Arc<dyn IndexNode>,
+    ) -> Result<(), SystemError> {
+        // TODO: 判断是否指向的other为一个符号链接
+        // 也就是需要判断 test1 -> echo -> busybox 的情况下, other的name是busybox,但是name2是echo
+        let other: &LockedRamFSInode = other
+            .downcast_ref::<LockedRamFSInode>()
+            .ok_or(SystemError::EPERM)?;
+        let name = DName::from(name1);
+        let other_name = DName::from(name2);
+        let mut inode: SpinLockGuard<RamFSInode> = self.0.lock();
+        let other_locked: SpinLockGuard<RamFSInode> = other.0.lock();
+
+        // 如果当前inode不是文件夹,那么报错
+        if inode.metadata.file_type != FileType::Dir {
+            return Err(SystemError::ENOTDIR);
+        }
+
+        // 如果另一个inode是文件夹,那么也报错
+        if other_locked.metadata.file_type == FileType::Dir {
+            return Err(SystemError::EISDIR);
+        }
+
+        // 如果当前文件夹下已经有同名文件,也报错。
+        if inode.children.contains_key(&name) {
+            return Err(SystemError::EEXIST);
+        }
+
+        // 将子name插入父syms的B树中
+        inode.syms.insert(name.clone(), other_name.clone());
+
+        inode
+            .children
+            .insert(name, other_locked.self_ref.upgrade().unwrap());
+
+        return Ok(());
+    }
+
+    fn symunlink(&self, _name: &str) -> Result<(), SystemError> {
+        // 若文件系统没有实现此方法,则返回“不支持”
+        return Err(SystemError::ENOSYS);
+    }
+
+    fn get_nextsym(&self, name: &str) -> Result<String, SystemError> {
+        let name = DName::from(name);
+        let inode: SpinLockGuard<RamFSInode> = self.0.lock();
+
+        if let Some(r) = inode.syms.get(&name) {
+            return Ok(r.0.to_string());
+        }
+
+        return Ok(name.to_string());
+    }
+
     fn rmdir(&self, name: &str) -> Result<(), SystemError> {
         let name = DName::from(name);
         let mut inode: SpinLockGuard<RamFSInode> = self.0.lock();
@@ -600,6 +661,7 @@ impl IndexNode for LockedRamFSInode {
             parent: inode.self_ref.clone(),
             self_ref: Weak::default(),
             children: BTreeMap::new(),
+            syms: BTreeMap::new(),
             data: Vec::new(),
             metadata: Metadata {
                 dev_id: 0,

+ 17 - 0
kernel/src/filesystem/vfs/file.rs

@@ -5,6 +5,7 @@ use log::error;
 use system_error::SystemError;
 
 use super::{FileType, IndexNode, InodeId, Metadata, SpecialNodeData};
+use crate::process::pid::PidPrivateData;
 use crate::{
     driver::{
         base::{block::SeekFrom, device::DevicePrivateData},
@@ -34,6 +35,8 @@ pub enum FilePrivateData {
     Tty(TtyFilePrivateData),
     /// epoll私有信息
     EPoll(EPollPrivateData),
+    /// pid私有信息
+    Pid(PidPrivateData),
     /// 不需要文件私有信息
     Unused,
 }
@@ -50,6 +53,20 @@ impl FilePrivateData {
             pdata.set_mode(mode);
         }
     }
+
+    pub fn is_pid(&self) -> bool {
+        if let FilePrivateData::Pid(_data) = self {
+            return true;
+        }
+        false
+    }
+
+    pub fn get_pid(&self) -> i32 {
+        if let FilePrivateData::Pid(data) = self {
+            return data.pid();
+        }
+        -1
+    }
 }
 
 bitflags! {

+ 75 - 0
kernel/src/filesystem/vfs/mod.rs

@@ -8,6 +8,7 @@ pub mod syscall;
 pub mod utils;
 pub mod vcore;
 
+use self::utils::rsplit_path;
 use ::core::{any::Any, fmt::Debug, sync::atomic::AtomicUsize};
 use alloc::{string::String, sync::Arc, vec::Vec};
 use derive_builder::Builder;
@@ -81,6 +82,23 @@ impl From<FileType> for ModeType {
     }
 }
 
+impl From<ModeType> for FileType {
+    fn from(mode: ModeType) -> Self {
+        // 提取文件类型部分
+        match mode & ModeType::S_IFMT {
+            t if t == ModeType::S_IFREG => FileType::File,
+            t if t == ModeType::S_IFDIR => FileType::Dir,
+            t if t == ModeType::S_IFBLK => FileType::BlockDevice,
+            t if t == ModeType::S_IFCHR => FileType::CharDevice,
+            t if t == ModeType::S_IFLNK => FileType::SymLink,
+            t if t == ModeType::S_IFSOCK => FileType::Socket,
+            t if t == ModeType::S_IFIFO => FileType::Pipe,
+            // 默认情况,通常应该不会发生,因为 S_IFMT 应该覆盖所有情况
+            _ => FileType::File,
+        }
+    }
+}
+
 #[allow(dead_code)]
 #[derive(Debug, Clone)]
 pub enum SpecialNodeData {
@@ -368,6 +386,46 @@ pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync {
         return Err(SystemError::ENOSYS);
     }
 
+    /// @brief 在当前目录下,创建一个名为Name的符号链接(软链接),指向另一个IndexNode,支持链接向一个符号链接
+    ///
+    /// @param name1 符号链接的名称, 将会在此目录下创建名为name的inode
+    /// @param name1 要被指向的 name 名称, 会根据此name与other的name区分是否指向一个符号链接
+    /// @param other 要被指向的IndexNode的Arc指针
+    ///
+    /// @return 成功:Ok(新的inode的Arc指针)
+    ///         失败:Err(错误码)
+    fn symlink(
+        &self,
+        _name1: &str,
+        _name2: &str,
+        _other: &Arc<dyn IndexNode>,
+    ) -> Result<(), SystemError> {
+        // 若文件系统没有实现此方法,则返回“不支持”
+        return Err(SystemError::ENOSYS);
+    }
+
+    /// @brief 在当前目录下,删除一个名为Name的符号链接(软链接)
+    ///
+    /// @param name 符号链接的名称
+    ///
+    /// @return 成功:Ok()
+    ///         失败:Err(错误码)
+    fn symunlink(&self, _name: &str) -> Result<(), SystemError> {
+        // 若文件系统没有实现此方法,则返回“不支持”
+        return Err(SystemError::ENOSYS);
+    }
+
+    /// @brief 在当前目录下,获取名为Name的符号链接(软链接)的执行的文件名称
+    ///
+    /// @param name 符号链接的名称
+    ///
+    /// @return 成功:Ok()
+    ///         失败:Err(错误码)
+    fn get_nextsym(&self, _name: &str) -> Result<String, SystemError> {
+        // 若文件系统没有实现此方法,则返回“不支持”
+        return Err(SystemError::ENOSYS);
+    }
+
     /// @brief 删除文件夹
     ///
     /// @param name 文件夹名称
@@ -1276,3 +1334,20 @@ impl<'a> FilldirContext<'a> {
         return Ok(());
     }
 }
+
+/// 查找链接文件的最底层链接, 目的是解决循环嵌套链接
+/// 如 test -> echo, echo -> busybox, 需要解析 test 的链接情况
+/// 返回 name 指向的文件的名称(可能为绝对路径, 具体值为symlink传入的参数值)
+/// 如果返回其本身(最底层文件名, 经过路径处理后的), 说明此文件不是链接文件
+#[allow(dead_code)]
+pub fn get_link_true_file(name: String) -> Result<String, SystemError> {
+    let (filename, parent_path) = rsplit_path(&name);
+    let parent_inode = match parent_path {
+        None => ProcessManager::current_mntns().root_inode(),
+        Some(path) => ProcessManager::current_mntns()
+            .root_inode()
+            .lookup(path)
+            .unwrap(),
+    };
+    parent_inode.get_nextsym(filename)
+}

+ 18 - 0
kernel/src/filesystem/vfs/mount.rs

@@ -709,6 +709,24 @@ impl IndexNode for MountFSInode {
         return self.inner_inode.unlink(name);
     }
 
+    fn symlink(
+        &self,
+        name1: &str,
+        name2: &str,
+        other: &Arc<dyn IndexNode>,
+    ) -> Result<(), SystemError> {
+        return self.inner_inode.symlink(name1, name2, other);
+    }
+
+    fn symunlink(&self, _name: &str) -> Result<(), SystemError> {
+        // 若文件系统没有实现此方法,则返回“不支持”
+        return Err(SystemError::ENOSYS);
+    }
+
+    fn get_nextsym(&self, name: &str) -> Result<String, SystemError> {
+        return self.inner_inode.get_nextsym(name);
+    }
+
     #[inline]
     fn rmdir(&self, name: &str) -> Result<(), SystemError> {
         let inode_id = self.inner_inode.find(name)?.metadata()?.inode_id;

+ 1 - 0
kernel/src/filesystem/vfs/syscall/mod.rs

@@ -28,6 +28,7 @@ mod sys_ioctl;
 mod sys_linkat;
 mod sys_lseek;
 mod sys_mkdirat;
+pub mod sys_mknodat;
 mod sys_openat;
 mod sys_pread64;
 mod sys_pselect6;

+ 83 - 0
kernel/src/filesystem/vfs/syscall/sys_mknodat.rs

@@ -0,0 +1,83 @@
+use alloc::string::ToString;
+
+use super::ModeType;
+use crate::arch::interrupt::TrapFrame;
+use crate::arch::syscall::nr::SYS_MKNODAT;
+use crate::driver::base::device::device_number::DeviceNumber;
+use crate::filesystem::vfs::FileType;
+use crate::filesystem::vfs::MAX_PATHLEN;
+use crate::process::ProcessManager;
+use crate::syscall::table::FormattedSyscallParam;
+use crate::syscall::table::Syscall;
+use alloc::vec::Vec;
+use system_error::SystemError;
+
+use crate::syscall::user_access::check_and_clone_cstr;
+
+pub struct SysMknodatHandle;
+
+impl Syscall for SysMknodatHandle {
+    /// Returns the number of arguments this syscall takes (4).
+    fn num_args(&self) -> usize {
+        4
+    }
+
+    /// Handles the syscall
+    fn handle(&self, args: &[usize], _frame: &mut TrapFrame) -> Result<usize, SystemError> {
+        let dirfd = Self::dirfd(args);
+        let path = Self::path(args);
+        let mode = Self::mode(args);
+        let dev = DeviceNumber::from(Self::dev(args));
+
+        let path = check_and_clone_cstr(path, Some(MAX_PATHLEN))?
+            .into_string()
+            .map_err(|_| SystemError::EINVAL)?;
+        let mode = ModeType::from_bits(mode).ok_or(SystemError::EINVAL)?;
+
+        let binding = ProcessManager::current_pcb().fd_table();
+        let fd_table_guard = binding.read();
+        let file = fd_table_guard
+            .get_file_by_fd(dirfd)
+            .ok_or(SystemError::EBADF)?;
+        drop(fd_table_guard);
+
+        if file.file_type() != FileType::Dir {
+            return Err(SystemError::EBADF);
+        }
+
+        file.inode().mknod(&path, mode, dev)?;
+
+        Ok(0)
+    }
+
+    /// Formats the syscall arguments for display/debugging purposes.
+    fn entry_format(&self, args: &[usize]) -> Vec<FormattedSyscallParam> {
+        vec![
+            FormattedSyscallParam::new("dirfd", Self::dirfd(args).to_string()),
+            FormattedSyscallParam::new("path", format!("{:#x}", Self::path(args) as usize)),
+            FormattedSyscallParam::new("mode", Self::mode(args).to_string()),
+            FormattedSyscallParam::new("dev", Self::dev(args).to_string()),
+        ]
+    }
+}
+
+impl SysMknodatHandle {
+    /// Extracts the dir descriptor (dirfd) argument from syscall parameters.
+    fn dirfd(args: &[usize]) -> i32 {
+        args[0] as i32
+    }
+    /// Extracts the path argument from syscall parameters.
+    fn path(args: &[usize]) -> *const u8 {
+        args[1] as *const u8
+    }
+    /// Extracts the mode argument from syscall parameters.
+    fn mode(args: &[usize]) -> u32 {
+        args[2] as u32
+    }
+    /// Extracts the dev_t argument from syscall parameters.
+    fn dev(args: &[usize]) -> u32 {
+        args[3] as u32
+    }
+}
+
+syscall_table_macros::declare_syscall!(SYS_MKNODAT, SysMknodatHandle);

+ 17 - 0
kernel/src/filesystem/vfs/vcore.rs

@@ -162,6 +162,23 @@ pub fn mount_root_fs() -> Result<(), SystemError> {
     return Ok(());
 }
 
+#[cfg(feature = "initram")]
+pub fn change_root_fs() -> Result<(), SystemError> {
+    info!("Try to change root fs to initramfs...");
+    let initramfs = crate::init::initram::INIT_ROOT_INODE().fs();
+    let r = migrate_virtual_filesystem(initramfs);
+
+    if r.is_err() {
+        error!("Failed to migrate virtual filesystem to initramfs!");
+        loop {
+            spin_loop();
+        }
+    }
+    info!("Successfully migrate rootfs to initramfs!");
+
+    return Ok(());
+}
+
 define_event_trace!(
     do_mkdir_at,
     TP_system(vfs),

+ 236 - 0
kernel/src/init/boot.rs

@@ -1,15 +1,42 @@
+use alloc::string::ToString;
+use alloc::sync::{Arc, Weak};
+use core::any::Any;
 use core::cmp::min;
 
 use acpi::rsdp::Rsdp;
 use alloc::string::String;
 use system_error::SystemError;
 
+use crate::driver::base::kobject::KObjectState;
+use crate::filesystem::vfs::syscall::ModeType;
+use crate::init::initcall::INITCALL_POSTCORE;
+use crate::libs::rwlock::RwLockReadGuard;
+use crate::libs::rwlock::RwLockWriteGuard;
+use crate::libs::spinlock::{SpinLock, SpinLockGuard};
 use crate::{
     arch::init::ArchBootParams,
     driver::video::fbdev::base::BootTimeScreenInfo,
+    filesystem::kernfs::KernFSInode,
+    filesystem::sysfs::{Attribute, AttributeGroup, SysFSOps, SysFSOpsSupport, SYSFS_ATTR_MODE_RO},
     libs::lazy_init::Lazy,
+    misc::ksysfs::sys_kernel_kobj,
     mm::{PhysAddr, VirtAddr},
 };
+use unified_init::macros::unified_init;
+
+use crate::driver::base::{
+    kobject::{KObjType, KObject, KObjectManager, KObjectSysFSOps, LockedKObjectState},
+    kset::KSet,
+};
+
+/// `/sys/kernel/boot_params`的 kobject, 需要这里加一个引用来保持持久化, 不然会被释放
+static mut SYS_KERNEL_BOOT_PARAMS_INSTANCE: Option<Arc<BootParamsSys>> = None;
+
+#[inline(always)]
+#[allow(dead_code)]
+pub fn sys_kernel_boot_params() -> Arc<BootParamsSys> {
+    unsafe { SYS_KERNEL_BOOT_PARAMS_INSTANCE.clone().unwrap() }
+}
 
 use super::boot_params;
 #[derive(Debug)]
@@ -127,6 +154,12 @@ pub trait BootCallbacks: Send + Sync {
     ///
     /// 该函数应该把内核命令行参数追加到`boot_params().boot_cmdline`中
     fn init_kernel_cmdline(&self) -> Result<(), SystemError>;
+    /// 初始化initramfs
+    ///
+    /// 该函数会检索[外部initramfs]追加到`boot_params().initramfs`中,
+    /// [外部initramfs] 指由bootloader加载的,如qemu的 -initrd 参数
+    #[allow(dead_code)]
+    fn init_initramfs(&self) -> Result<(), SystemError>;
     /// 初始化帧缓冲区信息
     ///
     /// - 该函数应该把帧缓冲区信息写入`scinfo`中。
@@ -136,8 +169,13 @@ pub trait BootCallbacks: Send + Sync {
         scinfo: &mut BootTimeScreenInfo,
     ) -> Result<(), SystemError>;
 
+    // TODO: 下面三个应该合成一个, 都存到 arch/boot_params(linux是这样的)
     /// 初始化内存块
     fn early_init_memory_blocks(&self) -> Result<(), SystemError>;
+    /// 初始化内存 memmap 信息到 sysfs
+    fn early_init_memmap_sysfs(&self) -> Result<(), SystemError>;
+    /// 初始化内存 memmap 信息到 boot_params
+    fn init_memmap_bp(&self) -> Result<(), SystemError>;
 }
 
 static BOOT_CALLBACKS: Lazy<&'static dyn BootCallbacks> = Lazy::new();
@@ -179,3 +217,201 @@ pub enum BootloaderAcpiArg {
     /// Address of XSDT provided in RSDP v2+.
     Xsdt(Rsdp),
 }
+
+/// 初始化boot_params模块在sysfs中的目录
+#[unified_init(INITCALL_POSTCORE)]
+fn bootparams_sysfs_init() -> Result<(), SystemError> {
+    let bp = BootParamsSys::new("boot_params".to_string());
+
+    unsafe {
+        SYS_KERNEL_BOOT_PARAMS_INSTANCE = Some(bp.clone());
+    }
+
+    let kobj = sys_kernel_kobj();
+    bp.set_parent(Some(Arc::downgrade(&(kobj as Arc<dyn KObject>))));
+    KObjectManager::add_kobj(bp.clone() as Arc<dyn KObject>).unwrap_or_else(|e| {
+        log::warn!("Failed to add boot_params kobject to sysfs: {:?}", e);
+    });
+
+    return Ok(());
+}
+
+#[derive(Debug)]
+pub struct BootParamsSys {
+    inner: SpinLock<BootParamsSysInner>,
+    kobj_state: LockedKObjectState,
+    name: String,
+}
+
+#[derive(Debug)]
+pub struct BootParamsSysInner {
+    kern_inode: Option<Arc<KernFSInode>>,
+    kset: Option<Arc<KSet>>,
+    parent_kobj: Option<Weak<dyn KObject>>,
+}
+
+#[derive(Debug)]
+struct BootParamsAttrGroup;
+
+impl AttributeGroup for BootParamsAttrGroup {
+    fn name(&self) -> Option<&str> {
+        None
+    }
+
+    fn attrs(&self) -> &[&'static dyn Attribute] {
+        &[&AttrData, &AttrVersion]
+    }
+
+    fn is_visible(
+        &self,
+        _kobj: Arc<dyn KObject>,
+        attr: &'static dyn Attribute,
+    ) -> Option<ModeType> {
+        Some(attr.mode())
+    }
+}
+
+#[derive(Debug)]
+pub struct BootParamsKObjType;
+
+impl KObjType for BootParamsKObjType {
+    fn sysfs_ops(&self) -> Option<&dyn SysFSOps> {
+        Some(&KObjectSysFSOps)
+    }
+
+    fn attribute_groups(&self) -> Option<&'static [&'static dyn AttributeGroup]> {
+        Some(&[&BootParamsAttrGroup])
+    }
+
+    fn release(&self, _kobj: Arc<dyn KObject>) {}
+}
+
+impl BootParamsSys {
+    pub fn new(name: String) -> Arc<Self> {
+        let bp = BootParamsSys {
+            inner: SpinLock::new(BootParamsSysInner {
+                kern_inode: None,
+                kset: None,
+                parent_kobj: None,
+            }),
+            kobj_state: LockedKObjectState::new(Some(KObjectState::INITIALIZED)),
+            name: name.clone(),
+        };
+        Arc::new(bp)
+    }
+
+    pub fn inner(&self) -> SpinLockGuard<'_, BootParamsSysInner> {
+        self.inner.lock_irqsave()
+    }
+}
+
+impl KObject for BootParamsSys {
+    fn as_any_ref(&self) -> &dyn Any {
+        self
+    }
+
+    fn set_inode(&self, inode: Option<Arc<KernFSInode>>) {
+        self.inner().kern_inode = inode;
+    }
+
+    fn inode(&self) -> Option<Arc<KernFSInode>> {
+        self.inner().kern_inode.clone()
+    }
+
+    fn parent(&self) -> Option<Weak<dyn KObject>> {
+        self.inner().parent_kobj.clone()
+    }
+
+    fn set_parent(&self, parent: Option<Weak<dyn KObject>>) {
+        self.inner().parent_kobj = parent;
+    }
+
+    fn kset(&self) -> Option<Arc<KSet>> {
+        self.inner().kset.clone()
+    }
+
+    fn set_kset(&self, kset: Option<Arc<KSet>>) {
+        self.inner().kset = kset;
+    }
+
+    fn kobj_type(&self) -> Option<&'static dyn KObjType> {
+        Some(&BootParamsKObjType)
+    }
+
+    fn set_kobj_type(&self, _ktype: Option<&'static dyn KObjType>) {}
+
+    fn name(&self) -> String {
+        self.name.clone()
+    }
+
+    fn set_name(&self, _name: String) {}
+
+    fn kobj_state(&self) -> RwLockReadGuard<'_, KObjectState> {
+        self.kobj_state.read()
+    }
+
+    fn kobj_state_mut(&self) -> RwLockWriteGuard<'_, KObjectState> {
+        self.kobj_state.write()
+    }
+
+    fn set_kobj_state(&self, state: KObjectState) {
+        *self.kobj_state_mut() = state;
+    }
+}
+
+#[derive(Debug)]
+struct AttrData;
+
+impl Attribute for AttrData {
+    fn name(&self) -> &str {
+        "data"
+    }
+
+    fn mode(&self) -> ModeType {
+        SYSFS_ATTR_MODE_RO
+    }
+
+    fn support(&self) -> SysFSOpsSupport {
+        SysFSOpsSupport::ATTR_SHOW
+    }
+
+    fn show(&self, _kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
+        let mut bp = boot_params().write();
+        // 下面boot_params不应该用这些函数初始化, 详情见这些函数里的注释
+        bp.arch.set_alt_mem_k(0x7fb40);
+        bp.arch.set_scratch(0x10000d);
+        bp.arch.init_setupheader();
+        let bp_buf = bp.arch.convert_to_buf();
+        let len = core::cmp::min(bp_buf.len(), buf.len());
+        buf[..len].copy_from_slice(&bp_buf[..len]);
+        Ok(buf.len())
+    }
+}
+
+#[derive(Debug)]
+struct AttrVersion;
+
+impl Attribute for AttrVersion {
+    fn name(&self) -> &str {
+        "version"
+    }
+
+    fn mode(&self) -> ModeType {
+        SYSFS_ATTR_MODE_RO
+    }
+
+    fn support(&self) -> SysFSOpsSupport {
+        SysFSOpsSupport::ATTR_SHOW
+    }
+
+    fn show(&self, _kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
+        #[cfg(target_arch = "x86_64")]
+        let version = boot_params().read().arch.hdr.version;
+        #[cfg(not(target_arch = "x86_64"))]
+        let version = 0;
+        let version = format!("{:#x}\n", version);
+        let len = min(version.len(), buf.len());
+        buf[..len].copy_from_slice(version.as_bytes());
+        return Ok(len);
+    }
+}

+ 20 - 2
kernel/src/init/initial_kthread.rs

@@ -6,6 +6,9 @@ use alloc::ffi::CString;
 use log::{debug, error};
 use system_error::SystemError;
 
+#[cfg(feature = "initram")]
+use crate::filesystem::vfs::vcore::change_root_fs;
+
 use crate::{
     arch::{interrupt::TrapFrame, process::arch_switch_to_user},
     driver::net::e1000e::e1000e::e1000e_init,
@@ -72,7 +75,14 @@ fn kernel_init() -> Result<(), SystemError> {
         .inspect_err(|e| log::error!("ahci_init failed: {:?}", e))
         .ok();
 
-    mount_root_fs().expect("Failed to mount root fs");
+    if super::enable_initramfs() {
+        // 使用 initramfs, 迁移文件系统
+        #[cfg(feature = "initram")]
+        change_root_fs().expect("Failed to mount root fs");
+    } else {
+        // 不使用 initramfs, 正常启动
+        mount_root_fs().expect("Failed to mount root fs");
+    }
 
     // WARNING: We must keep `mount_root_fs` before stdio_init,
     // because `migrate_virtual_filesystem` will change the root directory of the file system.
@@ -117,7 +127,15 @@ fn switch_to_user() -> ! {
 
     let mut trap_frame = TrapFrame::new();
 
-    if let Some(path) = kenrel_cmdline_param_manager().init_proc_path() {
+    if super::enable_initramfs() {
+        // 使用 initramfs, 启动 /init
+        log::info!("Initramfs, Boot with specified init process: /init");
+
+        try_to_run_init_process("/init", &mut proc_init_info, &None, &mut trap_frame)
+            .unwrap_or_else(|e| {
+                panic!("Failed to run specified init process: /init, err: {:?}", e)
+            });
+    } else if let Some(path) = kenrel_cmdline_param_manager().init_proc_path() {
         log::info!("Boot with specified init process: {:?}", path);
 
         try_to_run_init_process(

+ 284 - 0
kernel/src/init/initram.rs

@@ -0,0 +1,284 @@
+use alloc::string::String;
+use alloc::string::ToString;
+use alloc::sync::Arc;
+use alloc::vec::Vec;
+
+use crate::filesystem::ramfs::RamFS;
+use crate::filesystem::vfs::mount::MountFlags;
+use crate::filesystem::vfs::FileSystem;
+use crate::filesystem::vfs::MountFS;
+use crate::init::boot::boot_callbacks;
+use crate::init::initcall::INITCALL_ROOTFS;
+use crate::libs::decompress::xz_decompress;
+use crate::libs::spinlock::SpinLock;
+use crate::process::namespace::mnt::MountPropagation;
+use cpio_reader::Mode;
+use system_error::SystemError;
+use unified_init::macros::unified_init;
+
+use crate::filesystem::vfs::{syscall::ModeType, utils::rsplit_path, FileType, IndexNode};
+
+static mut __INIT_ROOT_INODE: Option<Arc<dyn IndexNode>> = None;
+
+pub static mut __INIT_ROOT_ENABLED: bool = false;
+
+/// @brief 获取全局的 Initramfs 根节点
+#[inline(always)]
+#[allow(non_snake_case)]
+pub fn INIT_ROOT_INODE() -> Arc<dyn IndexNode> {
+    unsafe {
+        return __INIT_ROOT_INODE.as_ref().unwrap().clone();
+    }
+}
+
+#[cfg(target_arch = "x86_64")]
+#[allow(non_upper_case_globals, unexpected_cfgs)]
+#[used]
+pub static INITRAM_DATA: &[u8] = {
+    #[cfg(has_initram_x86)]
+    {
+        include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/initram/x86.cpio.xz"))
+    }
+    #[cfg(not(has_initram_x86))]
+    {
+        &[]
+    }
+};
+
+#[cfg(target_arch = "riscv64")]
+#[allow(non_upper_case_globals, unexpected_cfgs)]
+#[used]
+pub static INITRAM_DATA: &[u8] = {
+    #[cfg(has_initram_riscv64)]
+    {
+        include_bytes!(concat!(
+            env!("CARGO_MANIFEST_DIR"),
+            "/initram/riscv64.cpio.xz"
+        ))
+    }
+    #[cfg(not(has_initram_riscv64))]
+    {
+        &[]
+    }
+};
+
+#[cfg(target_arch = "loongarch64")]
+#[allow(non_upper_case_globals, unexpected_cfgs)]
+#[used]
+pub static INITRAM_DATA: &[u8] = {
+    #[cfg(has_initram_loongarch64)]
+    {
+        include_bytes!(concat!(
+            env!("CARGO_MANIFEST_DIR"),
+            "/initram/loongarch64.cpio.xz"
+        ))
+    }
+    #[cfg(not(has_initram_loongarch64))]
+    {
+        &[]
+    }
+};
+
+/// 获取内核中 initramfs 的数据的起始地址
+pub fn get_initramfs_start_addr() -> usize {
+    INITRAM_DATA.as_ptr() as usize
+}
+
+/// 获取内核中 initramfs 的数据的 Size
+pub fn get_initramfs_size() -> usize {
+    INITRAM_DATA.len()
+}
+
+/// 获取 initramfs 的数据的全新 Vec
+/// 此函数会复制内核中包含的 initramfs 内容到一个新的 Vec 中
+#[allow(dead_code)]
+pub fn get_initram_data() -> Vec<u8> {
+    INITRAM_DATA.to_vec()
+}
+
+/// 获取 initramfs 的数据的 Vec 引用
+/// 此函数会返回内核中包含的 initramfs 内容的引用
+pub fn get_initram() -> &'static [u8] {
+    INITRAM_DATA
+}
+
+#[derive(PartialEq, Eq, Hash, Debug, Clone)]
+struct CpioEntryInfo {
+    name: String,
+    file: Vec<u8>,
+    mode: Mode,
+    uid: u32,
+    gid: u32,
+    ino: u32,
+    mtime: u64,
+    nlink: u32,
+    dev: Option<u32>,
+    devmajor: Option<u32>,
+    devminor: Option<u32>,
+    rdev: Option<u32>,
+    rdevmajor: Option<u32>,
+    rdevminor: Option<u32>,
+}
+
+/// 目前只支持内核嵌入 xz 压缩格式的文件,他是使用命令"xz --check=crc32 --lzma2=dict=512KiB /tmp/initramfs.linux_amd64.cpio"得到的
+/// 同时对 cpio 格式的支持请见 cpio_reader crate
+/// 参考文献:https://book.linuxboot.org/coreboot.u-root.systemboot/index.html
+#[unified_init(INITCALL_ROOTFS)]
+#[inline(never)]
+pub fn initramfs_init() -> Result<(), SystemError> {
+    log::info!("initramfs_init start");
+
+    let ramfs = RamFS::new();
+    let mount_fs = MountFS::new(
+        ramfs,
+        None,
+        MountPropagation::new_private(),
+        None,
+        MountFlags::empty(),
+    );
+    let root_inode = mount_fs.root_inode();
+    unsafe {
+        __INIT_ROOT_INODE = Some(root_inode.clone());
+    }
+
+    // Linux 中,内嵌的 initramfs 始终存在
+    // 最新 Linux 使用编译参数控制是否包含和开启
+    log::info!(
+        "Inner initramfs(Compressed file) start addr is {:#x}, size is {:#x}",
+        get_initramfs_start_addr(),
+        get_initramfs_size()
+    );
+
+    if get_initramfs_size() == 0 {
+        log::error!("Initramfs error: Not found initram, the size is 0!");
+        return Err(SystemError::ENOENT);
+    }
+
+    let cpio_data = xz_decompress(get_initram()).unwrap();
+
+    let collected_entries_vec = cpio_reader::iter_files(&cpio_data)
+        .map(|entry| CpioEntryInfo {
+            name: entry.name().to_string(),
+            file: entry.file().to_vec(),
+            mode: entry.mode(),
+            uid: entry.uid(),
+            gid: entry.gid(),
+            ino: entry.ino(),
+            mtime: entry.mtime(),
+            nlink: entry.nlink(),
+            dev: entry.dev(),
+            devmajor: entry.devmajor(),
+            devminor: entry.devminor(),
+            rdev: entry.rdev(),
+            rdevmajor: entry.rdevmajor(),
+            rdevminor: entry.rdevminor(),
+        })
+        .collect::<Vec<_>>();
+
+    let mut links: Vec<usize> = Vec::new();
+
+    for (index, entry) in collected_entries_vec.iter().enumerate() {
+        // x86 的有 4 种文件:Dir, File, CharDevice, SymLink
+        let name = entry.name.clone();
+        let mode = ModeType::from_bits(entry.mode.bits()).unwrap();
+        let file_type = FileType::from(mode);
+        log::info!(
+            "Find cpio entry, Name:{}, ModeType:{:?}, FileType:{:?}",
+            name,
+            mode,
+            file_type
+        );
+        let (filename, parent_path) = rsplit_path(&name);
+        let parent_inode = match parent_path {
+            None => INIT_ROOT_INODE(),
+            Some(path) => INIT_ROOT_INODE().lookup(path).unwrap(),
+        };
+        match file_type {
+            FileType::Dir => {
+                // 直接插入, 无需处理数据
+                parent_inode.create(filename, file_type, mode).unwrap();
+            }
+            FileType::File => {
+                // 插入, 随后写入文件数据
+                let inode = parent_inode.create(filename, file_type, mode).unwrap();
+                inode
+                    .write_at(
+                        0,
+                        entry.file.len(),
+                        &entry.file,
+                        SpinLock::new(crate::filesystem::vfs::FilePrivateData::Unused).lock(),
+                    )
+                    .unwrap();
+            }
+            FileType::CharDevice => {
+                // 不处理, 如果使用 initramfs 那么直接从已经初始化好的根文件系统迁移到此文件系统
+            }
+            FileType::SymLink => {
+                // 暂时标记存入, 当 Dir 和 File 全部创建完成之后再创建链接, 因为有可能先读取到链接文件
+                links.push(index);
+            }
+            _ => {
+                panic!("FileType is not impled!");
+            }
+        };
+    }
+
+    // 处理链接文件
+    // TODO: 正常来说必须使用软链接(符号链接), 但是现在内核没有实现软链接
+    // 这里使用硬链接在一层符号嵌套访问上不会出问题, 但是执行多层符号嵌套会出问题, 这个使用了一个小暂时的方法
+    for i in 0..links.len() {
+        let entry = &collected_entries_vec[links[i]];
+        let name = entry.name.clone();
+        let (filename, parent_path) = rsplit_path(&name);
+        let parent_inode = match parent_path {
+            None => INIT_ROOT_INODE(),
+            Some(path) => INIT_ROOT_INODE().lookup(path).unwrap(),
+        };
+        let other_name = String::from_utf8(entry.file.clone()).unwrap();
+        let other = parent_inode.lookup(&other_name).unwrap();
+        parent_inode.symlink(filename, &other_name, &other).unwrap();
+    }
+
+    // 下面的方式是查看外置 initramfs, 例如使用 qemu 的 -initrd 参数加载的
+    // 这个是从 bios 传过来的 bootinfo 查找由 bios 加载到内存的 initramfs
+    // 暂时没实现,待实现
+    // 实现后需要参照 Linux 对内嵌和外置同时存在时 rootfs 的处理进行覆盖
+    // https://docs.linuxkernel.org.cn/filesystems/ramfs-rootfs-initramfs.html
+    boot_callbacks()
+        .init_initramfs()
+        .inspect_err(|e| {
+            log::error!("Failed to init boot initramfs: {:?}", e);
+        })
+        .ok();
+
+    // 检查是否使用 initramfs 作为根文件系统启动
+    // 判断标准: 是否存在 /init 程序, 与 Linux 相同
+    // 查找考虑链接
+    unsafe {
+        __INIT_ROOT_ENABLED = INIT_ROOT_INODE().find("init").is_ok();
+        if !__INIT_ROOT_ENABLED {
+            // TODO: drop 掉所有的资源
+            // 此分支未做测试, 可能有内存释放不完全
+            let old_root_inode = __INIT_ROOT_INODE.take().unwrap();
+            drop(old_root_inode);
+            log::info!("Rootfs: will not use initramfs");
+            log::info!("initramfs_init done!");
+            return Ok(());
+        }
+    }
+
+    // 清除 dev, proc, sys 三个文件夹, 后续直接迁移根文件系统的过来
+    // 这里是因为 linux 默认不挂载这些文件夹, 通常交给 init 程序完成, 但是 DragonOS 会默认挂载
+    INIT_ROOT_INODE()
+        .rmdir("dev")
+        .expect("initramfs: Unable to remove /dev");
+    INIT_ROOT_INODE()
+        .rmdir("proc")
+        .expect("initramfs: Unable to remove /proc");
+    INIT_ROOT_INODE()
+        .rmdir("sys")
+        .expect("initramfs: Unable to remove /sys");
+
+    log::info!("initramfs_init done!");
+    Ok(())
+}

+ 261 - 0
kernel/src/init/kexec/kexec_core.rs

@@ -0,0 +1,261 @@
+use super::{
+    kexec_segment_buf, KexecFlags, KexecSegment, Kimage, KimageEntry, IND_DESTINATION, IND_DONE,
+    IND_INDIRECTION, IND_SOURCE, KEXEC_IMAGE,
+};
+use crate::arch::mm::LockedFrameAllocator;
+use crate::arch::CurrentIrqArch;
+use crate::arch::KexecArch;
+use crate::arch::MMArch;
+use crate::exception::InterruptArch;
+use crate::libs::spinlock::SpinLock;
+use crate::mm::page::{page_manager_lock_irqsave, Page, PageFlags, PageType};
+use crate::mm::MemoryManagementArch;
+use crate::syscall::user_access::UserBufferReader;
+use alloc::rc::Rc;
+use alloc::sync::Arc;
+use alloc::vec::Vec;
+use core::cmp::min;
+use core::mem::size_of;
+use system_error::SystemError;
+
+pub fn do_kexec_load(
+    entry: usize,
+    nr_segments: usize,
+    ksegments: &[KexecSegment],
+    flags: usize,
+) -> Result<usize, SystemError> {
+    let _flags = KexecFlags::from_bits_truncate(flags as u64);
+
+    if nr_segments == 0 {
+        /* Uninstall image */
+        log::warn!("kexec: nr_segments == 0, should Uninstall, not impled!");
+        return Ok(0);
+    }
+
+    let image = kimage_alloc_init(entry, nr_segments, ksegments, flags).unwrap();
+
+    // load segment 的解析: https://zhuanlan.zhihu.com/p/105284305
+    for i in 0..nr_segments {
+        kimage_load_normal_segment(image.clone(), i);
+    }
+
+    kimage_terminate(image.clone());
+
+    KexecArch::init_pgtable(image.clone());
+
+    if !KexecArch::machine_kexec_prepare(image.clone()) {
+        return Err(SystemError::EADV);
+    }
+
+    unsafe {
+        KEXEC_IMAGE = Some(image.clone());
+    }
+
+    Ok(0)
+}
+
+pub fn kimage_alloc_init(
+    entry: usize,
+    nr_segments: usize,
+    ksegments: &[KexecSegment],
+    _flags: usize,
+) -> Result<Rc<SpinLock<Kimage>>, SystemError> {
+    let image = Rc::new(SpinLock::new(Kimage {
+        head: 0,
+        entry: core::ptr::null_mut(),
+        last_entry: core::ptr::null_mut(),
+        start: 0,
+        control_code_page: None,
+        stack_page: None,
+        nr_segments: 0,
+        segment: [KexecSegment {
+            buffer: kexec_segment_buf {
+                buf: core::ptr::null_mut(),
+            },
+            bufsz: 0,
+            mem: 0,
+            memsz: 0,
+        }; super::KEXEC_SEGMENT_MAX],
+        pages: Vec::new(),
+        pgd: 0,
+    }));
+
+    image.lock().start = entry;
+    image.lock().nr_segments = nr_segments;
+
+    {
+        let mut locked_image = image.lock();
+        locked_image.entry = &mut locked_image.head as *mut KimageEntry;
+        locked_image.last_entry = &mut locked_image.head as *mut KimageEntry;
+    }
+
+    image.lock().segment[..ksegments.len()].copy_from_slice(ksegments);
+
+    let temp_c = kimage_alloc_pages(image.clone(), 0, false);
+    image.lock().control_code_page = temp_c.clone();
+
+    let temp_st = kimage_alloc_pages(image.clone(), 0, true);
+    image.lock().stack_page = temp_st.clone();
+
+    Ok(image)
+}
+
+pub fn kimage_alloc_pages(
+    kimage: Rc<SpinLock<Kimage>>,
+    order: usize,
+    store: bool,
+) -> Option<Arc<Page>> {
+    let mut _page = None;
+    let mut extra_pages: Vec<Arc<Page>> = Vec::new();
+    let mut alloc = page_manager_lock_irqsave();
+
+    let _count = 1 << order;
+
+    // 目前只分配一个页面, 后面改成多个, 使用 order 控制
+    loop {
+        let p = alloc
+            .create_one_page(
+                PageType::Normal,
+                PageFlags::PG_RESERVED | PageFlags::PG_PRIVATE,
+                &mut LockedFrameAllocator,
+            )
+            .unwrap();
+
+        if check_isdst(kimage.clone(), p.clone()) {
+            extra_pages.push(p);
+            continue;
+        }
+        if store {
+            kimage.lock().pages.push(p.clone());
+        }
+        _page = Some(p.clone());
+        break;
+    }
+
+    for p in extra_pages {
+        alloc.remove_page(&p.phys_address());
+    }
+
+    _page
+}
+
+pub fn check_isdst(kimage: Rc<SpinLock<Kimage>>, page: Arc<Page>) -> bool {
+    let nr_segments = kimage.lock().nr_segments;
+    let segments = kimage.lock().segment;
+    let paddr = page.phys_address().data();
+
+    for seg in segments.iter().take(nr_segments) {
+        let mem = seg.mem - MMArch::PAGE_SIZE;
+        let memend = mem + seg.memsz;
+        if paddr >= mem && paddr <= memend {
+            return true;
+        }
+    }
+
+    false
+}
+
+pub fn kernel_kexec() {
+    unsafe {
+        if KEXEC_IMAGE.is_none() {
+            return;
+        }
+        CurrentIrqArch::interrupt_disable();
+
+        let kimage = KEXEC_IMAGE.clone().unwrap().clone();
+
+        // TODO:像 linux 一样添加更多的设置
+
+        KexecArch::machine_kexec(kimage);
+    }
+}
+
+pub fn kimage_add_entry(kimage: Rc<SpinLock<Kimage>>, entry: KimageEntry) {
+    unsafe {
+        if *kimage.lock().entry != 0 {
+            let t = kimage.lock().entry.add(1);
+            kimage.lock().entry = t;
+        }
+
+        let k_entry = kimage.lock().entry;
+        let k_last_entry = kimage.lock().last_entry;
+        if k_entry == k_last_entry {
+            let page = kimage_alloc_pages(kimage.clone(), 0, true).unwrap();
+
+            let ind_page =
+                MMArch::phys_2_virt(page.phys_address()).unwrap().data() as *mut KimageEntry;
+
+            let page_phys_usize = page.phys_address().data() | IND_INDIRECTION;
+
+            *kimage.lock().entry = page_phys_usize;
+            kimage.lock().entry = ind_page;
+            let ind_page = ind_page.add((MMArch::PAGE_SIZE / size_of::<KimageEntry>()) - 1);
+            kimage.lock().last_entry = ind_page;
+        }
+
+        *kimage.lock().entry = entry;
+        let t = kimage.lock().entry.add(1);
+        kimage.lock().entry = t;
+        *kimage.lock().entry = 0;
+    }
+}
+
+pub fn kimage_set_destination(kimage: Rc<SpinLock<Kimage>>, destination: usize) {
+    let d = destination & MMArch::PAGE_MASK;
+    kimage_add_entry(kimage.clone(), d | IND_DESTINATION);
+}
+
+pub fn kimage_add_page(kimage: Rc<SpinLock<Kimage>>, page: usize) {
+    let p = page & MMArch::PAGE_MASK;
+    kimage_add_entry(kimage.clone(), p | IND_SOURCE);
+}
+
+pub fn kimage_load_normal_segment(kimage: Rc<SpinLock<Kimage>>, index: usize) {
+    let segment = kimage.lock().segment[index];
+
+    let mut maddr = segment.mem;
+    let mut mbytes: isize = segment.memsz as isize;
+    let mut buf = unsafe { segment.buffer.buf } as *mut u8;
+    let mut ubytes = segment.bufsz;
+
+    kimage_set_destination(kimage.clone(), maddr);
+
+    loop {
+        let page = kimage_alloc_pages(kimage.clone(), 0, true).unwrap();
+        kimage_add_page(kimage.clone(), page.phys_address().data());
+
+        let mut virt_data = unsafe { MMArch::phys_2_virt(page.phys_address()).unwrap().data() };
+        virt_data += maddr & !(MMArch::PAGE_MASK);
+        let mchunk = min(
+            mbytes as usize,
+            MMArch::PAGE_SIZE - (maddr & !MMArch::PAGE_MASK),
+        );
+        let uchunk = min(ubytes, mchunk);
+
+        if uchunk != 0 {
+            let usegments_buf = UserBufferReader::new::<u8>(buf, uchunk, true).unwrap();
+            let ksegment: &[u8] = usegments_buf.read_from_user(0).unwrap();
+            unsafe { core::ptr::copy(ksegment.as_ptr(), virt_data as *mut u8, uchunk) };
+
+            ubytes -= uchunk;
+            unsafe { buf = buf.add(uchunk) };
+        }
+
+        maddr += mchunk;
+        mbytes -= mchunk as isize;
+
+        if mbytes <= 0 {
+            return;
+        }
+    }
+}
+
+pub fn kimage_terminate(kimage: Rc<SpinLock<Kimage>>) {
+    unsafe {
+        if *kimage.lock().entry != 0 {
+            let t = kimage.lock().entry.add(1);
+            kimage.lock().entry = t;
+        }
+        *kimage.lock().entry = IND_DONE;
+    }
+}

+ 82 - 0
kernel/src/init/kexec/mod.rs

@@ -0,0 +1,82 @@
+pub mod kexec_core;
+pub mod syscall;
+
+use crate::libs::spinlock::SpinLock;
+use crate::mm::page::Page;
+use alloc::rc::Rc;
+use alloc::sync::Arc;
+use alloc::vec::Vec;
+use core::ffi::c_void;
+
+const KEXEC_SEGMENT_MAX: usize = 16;
+
+pub static mut KEXEC_IMAGE: Option<Rc<SpinLock<Kimage>>> = None;
+
+const IND_DESTINATION_BIT: usize = 0;
+const IND_INDIRECTION_BIT: usize = 1;
+const IND_DONE_BIT: usize = 2;
+const IND_SOURCE_BIT: usize = 3;
+
+const IND_DESTINATION: usize = 1 << IND_DESTINATION_BIT;
+const IND_INDIRECTION: usize = 1 << IND_INDIRECTION_BIT;
+const IND_DONE: usize = 1 << IND_DONE_BIT;
+const IND_SOURCE: usize = 1 << IND_SOURCE_BIT;
+
+type KimageEntry = usize;
+
+#[derive(Clone, Copy)]
+#[repr(C)]
+pub union kexec_segment_buf {
+    pub buf: *mut c_void,  // For user memory (user space pointer)
+    pub kbuf: *mut c_void, // For kernel memory (kernel space pointer)
+}
+
+#[derive(Clone, Copy)]
+#[repr(C)]
+pub struct KexecSegment {
+    /// This pointer can point to user memory if kexec_load() system
+    /// call is used or will point to kernel memory if
+    /// kexec_file_load() system call is used.
+    ///
+    /// Use ->buf when expecting to deal with user memory and use ->kbuf
+    /// when expecting to deal with kernel memory.
+    pub buffer: kexec_segment_buf,
+    pub bufsz: usize,
+    pub mem: usize, // unsigned long typically matches usize
+    pub memsz: usize,
+}
+
+/// kimage结构体定义, 没写全, 见https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/kexec.h#321
+#[repr(C)]
+pub struct Kimage {
+    pub head: KimageEntry,
+    pub entry: *mut KimageEntry,
+    pub last_entry: *mut KimageEntry,
+
+    pub start: usize,
+    pub control_code_page: Option<Arc<Page>>,
+    // stack_page
+    // 这里与 linux 不一样, 因为 linux 的 control_page 是一个 page *,
+    // 他实际上指向两个页面, 也就是 control_page 和 stack_page, 且要求这俩页面地址连续
+    // 但是 rust 这块我还没想好要不要用 Vec 去做, 因此先这么用着
+    pub stack_page: Option<Arc<Page>>,
+
+    pub nr_segments: usize,
+    pub segment: [KexecSegment; KEXEC_SEGMENT_MAX],
+
+    pub pages: Vec<Arc<Page>>,
+
+    /*
+     * This is a kimage control page, as it must not overlap with either
+     * source or destination address ranges.
+     */
+    pub pgd: usize,
+}
+
+bitflags! {
+    pub struct KexecFlags: u64 {
+        const KEXEC_ON_CRASH = 0x00000001;
+        const KEXEC_PRESERVE_CONTEXT = 0x00000002;
+        const KEXEC_ARCH_MASK = 0xffff0000;
+    }
+}

+ 63 - 0
kernel/src/init/kexec/syscall.rs

@@ -0,0 +1,63 @@
+use super::kexec_core::do_kexec_load;
+use super::KexecSegment;
+use crate::arch::interrupt::TrapFrame;
+use crate::arch::syscall::nr::SYS_KEXEC_LOAD;
+use crate::syscall::table::{FormattedSyscallParam, Syscall};
+use crate::syscall::user_access::UserBufferReader;
+use alloc::vec::Vec;
+use system_error::SystemError;
+
+pub struct SysKexecLoad;
+
+impl SysKexecLoad {
+    fn entry(args: &[usize]) -> usize {
+        args[0]
+    }
+
+    fn nr_segments(args: &[usize]) -> usize {
+        args[1]
+    }
+
+    fn segments_ptr(args: &[usize]) -> usize {
+        args[2]
+    }
+
+    fn flags(args: &[usize]) -> usize {
+        args[3]
+    }
+}
+
+impl Syscall for SysKexecLoad {
+    fn num_args(&self) -> usize {
+        4
+    }
+
+    fn handle(&self, args: &[usize], _frame: &mut TrapFrame) -> Result<usize, SystemError> {
+        let entry = Self::entry(args);
+        let nr_segments = Self::nr_segments(args);
+        let segments_ptr = Self::segments_ptr(args);
+        let flags = Self::flags(args);
+
+        // TODO: do some check
+
+        let usegments_buf = UserBufferReader::new::<KexecSegment>(
+            segments_ptr as *mut KexecSegment,
+            core::mem::size_of::<KexecSegment>() * nr_segments,
+            true,
+        )?;
+        let ksegments: &[KexecSegment] = usegments_buf.read_from_user(0)?;
+
+        do_kexec_load(entry, nr_segments, ksegments, flags)
+    }
+
+    fn entry_format(&self, args: &[usize]) -> Vec<FormattedSyscallParam> {
+        vec![
+            FormattedSyscallParam::new("entry", format!("{:#x}", Self::entry(args))),
+            FormattedSyscallParam::new("nr_segments", format!("{:#x}", Self::nr_segments(args))),
+            FormattedSyscallParam::new("segments_ptr", format!("{:#x}", Self::segments_ptr(args))),
+            FormattedSyscallParam::new("flags", format!("{:#x}", Self::flags(args))),
+        ]
+    }
+}
+
+syscall_table_macros::declare_syscall!(SYS_KEXEC_LOAD, SysKexecLoad);

+ 13 - 0
kernel/src/init/mod.rs

@@ -7,8 +7,12 @@ pub mod cmdline;
 pub mod init;
 pub mod initcall;
 pub mod initial_kthread;
+pub mod kexec;
 pub mod version_info;
 
+#[cfg(feature = "initram")]
+pub mod initram;
+
 /// 启动参数
 static BOOT_PARAMS: RwLock<BootParams> = RwLock::new(BootParams::new());
 
@@ -21,3 +25,12 @@ pub fn boot_params() -> &'static RwLock<BootParams> {
 fn init_intertrait() {
     intertrait::init_caster_map();
 }
+
+pub fn enable_initramfs() -> bool {
+    #[cfg(feature = "initram")]
+    unsafe {
+        self::initram::__INIT_ROOT_ENABLED
+    }
+    #[cfg(not(feature = "initram"))]
+    false
+}

+ 25 - 0
kernel/src/ipc/signal_types.rs

@@ -580,6 +580,31 @@ impl SigPending {
     }
 }
 
+/// @brief 信号处理备用栈的信息
+#[allow(dead_code)]
+#[derive(Debug, Clone, Copy)]
+pub struct SigStack {
+    pub sp: usize,
+    pub flags: u32,
+    pub size: u32,
+}
+
+impl SigStack {
+    pub fn new() -> Self {
+        Self {
+            sp: 0,
+            flags: 0,
+            size: 0,
+        }
+    }
+}
+
+impl Default for SigStack {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 /// @brief 进程接收到的信号的队列
 #[derive(Debug, Clone, Default)]
 pub struct SigQueue {

+ 2 - 0
kernel/src/ipc/syscall/mod.rs

@@ -1,4 +1,5 @@
 pub mod sys_kill;
+mod sys_pidfd_sendsignal;
 #[cfg(target_arch = "x86_64")]
 pub mod sys_pipe;
 pub mod sys_pipe2;
@@ -11,6 +12,7 @@ mod sys_shmctl;
 mod sys_shmdt;
 mod sys_shmget;
 mod sys_sigaction;
+mod sys_sigaltstack;
 mod sys_sigpending;
 pub mod sys_tgkill;
 pub mod sys_tkill;

+ 91 - 0
kernel/src/ipc/syscall/sys_pidfd_sendsignal.rs

@@ -0,0 +1,91 @@
+use crate::arch::ipc::signal::Signal;
+use crate::ipc::signal_types::SigCode;
+use crate::ipc::signal_types::{SigInfo, SigType};
+use alloc::string::ToString;
+use alloc::vec::Vec;
+use core::ffi::c_int;
+
+use crate::arch::interrupt::TrapFrame;
+use crate::process::RawPid;
+use crate::syscall::table::FormattedSyscallParam;
+use crate::syscall::table::Syscall;
+use crate::{arch::syscall::nr::SYS_PIDFD_SEND_SIGNAL, process::ProcessManager};
+use system_error::SystemError;
+
+pub struct SysPidfdSendSignalHandle;
+
+impl SysPidfdSendSignalHandle {
+    #[inline(always)]
+    fn pidfd(args: &[usize]) -> i32 {
+        args[0] as i32
+    }
+    #[inline(always)]
+    fn sig(args: &[usize]) -> c_int {
+        args[1] as c_int
+    }
+    #[inline(always)]
+    fn siginfo(args: &[usize]) -> *mut i32 {
+        args[2] as *mut i32
+    }
+    #[inline(always)]
+    fn flags(args: &[usize]) -> usize {
+        args[3]
+    }
+}
+
+impl Syscall for SysPidfdSendSignalHandle {
+    fn num_args(&self) -> usize {
+        4
+    }
+
+    fn handle(&self, args: &[usize], _frame: &mut TrapFrame) -> Result<usize, SystemError> {
+        let pidfd = Self::pidfd(args);
+        let sig_c_int = Self::sig(args);
+        let _sig_info = Self::siginfo(args);
+        let _flags = Self::flags(args);
+
+        // TODO: 完整的支持此系统调用
+        let mut pid = 0;
+        let file = ProcessManager::current_pcb()
+            .fd_table()
+            .read()
+            .get_file_by_fd(pidfd)
+            .unwrap();
+        if file.private_data.lock().is_pid() {
+            pid = file.private_data.lock().get_pid();
+        }
+
+        let sig = Signal::from(sig_c_int);
+        if sig == Signal::INVALID {
+            // log::warn!("Pidfd_Send_Signal: Send empty sig(0)");
+            // 这里的信号是 0, 是空信号值, 其他的信号处理是怎样的不清楚, 但是这里应该直接返回成功, 因为 0 是空信号
+            return Ok(0);
+        }
+
+        // 应该从参数获取
+        let mut info = SigInfo::new(
+            sig,
+            0,
+            SigCode::User,
+            SigType::Kill(RawPid::new(pid as usize)),
+        );
+
+        let ret = sig
+            .send_signal_info(Some(&mut info), RawPid::new(pid as usize))
+            .map(|x| x as usize);
+
+        ret
+    }
+
+    fn entry_format(&self, args: &[usize]) -> Vec<FormattedSyscallParam> {
+        vec![
+            FormattedSyscallParam::new("pidfd", Self::pidfd(args).to_string()),
+            FormattedSyscallParam::new("sig", Self::sig(args).to_string()),
+            FormattedSyscallParam::new("siginfo", format!("{:#x}", Self::siginfo(args) as usize)),
+            FormattedSyscallParam::new("options", format!("{:#x}", Self::flags(args))),
+        ]
+    }
+}
+
+// 注册系统调用
+syscall_table_macros::declare_syscall!(SYS_PIDFD_SEND_SIGNAL, SysPidfdSendSignalHandle);

+ 100 - 0
kernel/src/ipc/syscall/sys_sigaltstack.rs

@@ -0,0 +1,100 @@
+use alloc::string::ToString;
+use alloc::vec::Vec;
+use core::ffi::{c_int, c_void};
+
+use crate::arch::interrupt::TrapFrame;
+use crate::syscall::table::FormattedSyscallParam;
+use crate::syscall::table::Syscall;
+use crate::{arch::syscall::nr::SYS_SIGALTSTACK, process::ProcessManager};
+use system_error::SystemError;
+
+use crate::syscall::user_access::{UserBufferReader, UserBufferWriter};
+
+/// C 中定义的信号栈, 等于 C 中的 stack_t
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct StackUser {
+    pub ss_sp: *mut c_void, // 栈的基地址
+    pub ss_flags: c_int,    // 标志
+    pub ss_size: usize,     // 栈的字节数
+}
+
+impl StackUser {
+    pub fn new() -> Self {
+        Self {
+            ss_sp: core::ptr::null_mut(),
+            ss_flags: 0,
+            ss_size: 0,
+        }
+    }
+}
+
+pub struct SysAltStackHandle;
+
+impl SysAltStackHandle {
+    #[inline(always)]
+    fn ss(args: &[usize]) -> *const StackUser {
+        // 第一个参数是 ss
+        args[0] as *const StackUser
+    }
+    #[inline(always)]
+    fn old_ss(args: &[usize]) -> *mut StackUser {
+        // 第二个参数是 old_ss
+        args[1] as *mut StackUser
+    }
+}
+
+impl Syscall for SysAltStackHandle {
+    fn num_args(&self) -> usize {
+        2
+    }
+
+    fn handle(&self, args: &[usize], _frame: &mut TrapFrame) -> Result<usize, SystemError> {
+        //warn!("SYS_SIGALTSTACK has not yet been fully realized and still needs to be supplemented");
+        //warn!("SYS_SIGALTSTACK has not yet been fully realized and still needs to be supplemented");
+        //warn!("SYS_SIGALTSTACK has not yet been fully realized and still needs to be supplemented");
+
+        let ss = Self::ss(args);
+        let old_ss = Self::old_ss(args);
+
+        let binding = ProcessManager::current_pcb();
+        let mut stack = binding.sig_altstack_mut();
+
+        if !old_ss.is_null() {
+            // 需要从 current() 中读结构体写入 old_ss
+            //log::info!("old_ss impl");
+            let mut temp = StackUser::new();
+
+            temp.ss_sp = stack.sp as *mut c_void;
+            temp.ss_size = stack.size as usize;
+            // temp.ss_flags = 0; 这个要根据情况设置
+
+            let mut user_buffer = UserBufferWriter::new(old_ss, size_of::<StackUser>(), true)?;
+            user_buffer.copy_one_to_user(&temp, 0)?;
+        }
+
+        if !ss.is_null() {
+            // 需要向 current() 中结构体写入 ss 的内容
+            //log::info!("ss impl");
+
+            let user_buffer = UserBufferReader::new(ss, size_of::<StackUser>(), true)?;
+            let sus: &[StackUser] = user_buffer.read_from_user(0)?;
+            let ss: StackUser = sus[0];
+
+            stack.sp = ss.ss_sp as usize;
+            stack.size = ss.ss_size as u32;
+            stack.flags = ss.ss_flags as u32;
+        }
+        Ok(0)
+    }
+
+    fn entry_format(&self, _args: &[usize]) -> Vec<FormattedSyscallParam> {
+        vec![
+            FormattedSyscallParam::new("ss: ", "not impl".to_string()),
+            FormattedSyscallParam::new("old_ss: ", "not impl".to_string()),
+        ]
+    }
+}
+
+// 注册系统调用
+syscall_table_macros::declare_syscall!(SYS_SIGALTSTACK, SysAltStackHandle);

+ 32 - 0
kernel/src/libs/decompress.rs

@@ -0,0 +1,32 @@
+use alloc::vec::Vec;
+use system_error::SystemError;
+use xz4rust::{XzDecoder, XzNextBlockResult};
+
+#[allow(dead_code)]
+pub fn xz_decompress(compressed_data: &[u8]) -> Result<Vec<u8>, SystemError> {
+    let mut decompressed_data = Vec::new();
+
+    let initial_alloc_size = xz4rust::DICT_SIZE_MIN;
+    let max_alloc_size = xz4rust::DICT_SIZE_MAX;
+    let mut decoder = XzDecoder::in_heap_with_alloc_dict_size(initial_alloc_size, max_alloc_size);
+
+    let mut input_position = 0usize;
+    loop {
+        let mut temp_buffer = [0u8; 4096];
+        match decoder.decode(&compressed_data[input_position..], &mut temp_buffer) {
+            Ok(XzNextBlockResult::NeedMoreData(input_consumed, output_produced)) => {
+                input_position += input_consumed;
+                decompressed_data.extend_from_slice(&temp_buffer[..output_produced]);
+            }
+            Ok(XzNextBlockResult::EndOfStream(_, output_produced)) => {
+                decompressed_data.extend_from_slice(&temp_buffer[..output_produced]);
+                break;
+            }
+            Err(err) => panic!("Decompression failed {}", err),
+        };
+    }
+
+    log::info!("XZ Decompress success!");
+
+    Ok(decompressed_data)
+}

+ 2 - 0
kernel/src/libs/mod.rs

@@ -28,4 +28,6 @@ pub mod wait_queue;
 pub mod font;
 pub mod name;
 
+pub mod decompress;
+
 pub mod pod;

+ 2 - 2
kernel/src/misc/reboot.rs

@@ -177,8 +177,8 @@ pub(super) fn do_sys_reboot(
             return Err(SystemError::ENOSYS);
         }
         RebootCommand::Kexec => {
-            log::warn!("do_sys_reboot: Kexec not implemented");
-            return Err(SystemError::ENOSYS);
+            crate::init::kexec::kexec_core::kernel_kexec();
+            return Ok(());
         }
     }
 }

+ 151 - 0
kernel/src/mm/ident_map.rs

@@ -0,0 +1,151 @@
+use crate::arch::mm::LockedFrameAllocator;
+use crate::arch::MMArch;
+use crate::mm::{
+    allocator::page_frame::FrameAllocator,
+    page::{EntryFlags, PageEntry, PageFlush, PageTable},
+    MemoryManagementArch, PhysAddr, VirtAddr,
+};
+use core::marker::PhantomData;
+use core::sync::atomic::compiler_fence;
+use core::sync::atomic::Ordering;
+
+/// 恒等页表映射器( paddr == vaddr )
+#[derive(Hash)]
+pub struct IdentPageMapper<Arch, F> {
+    /// 根页表物理地址
+    table_paddr: PhysAddr,
+    /// 页分配器
+    frame_allocator: F,
+    phantom: PhantomData<fn() -> Arch>,
+}
+
+impl<Arch: MemoryManagementArch, F: FrameAllocator> IdentPageMapper<Arch, F> {
+    /// 创建新的页面映射器
+    ///
+    /// ## 参数
+    /// - table_kind 页表类型
+    /// - table_paddr 根页表物理地址
+    /// - allocator 页分配器
+    ///
+    /// ## 返回值
+    ///
+    /// 页面映射器
+    pub unsafe fn new(table_paddr: PhysAddr, allocator: F) -> Self {
+        return Self {
+            table_paddr,
+            frame_allocator: allocator,
+            phantom: PhantomData,
+        };
+    }
+
+    pub unsafe fn create(mut allocator: F) -> Self {
+        let table_paddr = allocator.allocate_one().unwrap();
+        let table_vaddr = Arch::phys_2_virt(table_paddr).unwrap();
+        Arch::write_bytes(table_vaddr, 0, Arch::PAGE_SIZE);
+        return Self::new(table_paddr, allocator);
+    }
+
+    pub fn paddr(&self) -> PhysAddr {
+        self.table_paddr
+    }
+
+    /// 映射一个物理页到指定的虚拟地址
+    pub unsafe fn map_phys(
+        table_paddr: PhysAddr,
+        virt: VirtAddr,
+        phys: PhysAddr,
+        mut allocator: F,
+    ) -> Option<PageFlush<Arch>> {
+        // 验证虚拟地址和物理地址是否对齐
+        if !(virt.check_aligned(Arch::PAGE_SIZE) && phys.check_aligned(Arch::PAGE_SIZE)) {
+            log::error!(
+                "Try to map unaligned page: virt={:?}, phys={:?}",
+                virt,
+                phys
+            );
+            return None;
+        }
+
+        let virt = VirtAddr::new(virt.data() & (!Arch::PAGE_NEGATIVE_MASK));
+        let flags = EntryFlags::from_data(
+            Arch::ENTRY_FLAG_PRESENT
+                | Arch::ENTRY_FLAG_READWRITE
+                | Arch::ENTRY_FLAG_EXEC
+                | Arch::ENTRY_FLAG_GLOBAL
+                | Arch::ENTRY_FLAG_DIRTY
+                | Arch::ENTRY_FLAG_ACCESSED,
+        );
+
+        // 创建页表项
+        let entry = PageEntry::new(phys, flags);
+        let mut table = PageTable::new(VirtAddr::new(0), table_paddr, Arch::PAGE_LEVELS - 1);
+        loop {
+            let i = table.index_of(virt).unwrap();
+
+            assert!(i < Arch::PAGE_ENTRY_NUM);
+            if table.level() == 0 {
+                compiler_fence(Ordering::SeqCst);
+
+                table.set_entry(i, entry);
+                compiler_fence(Ordering::SeqCst);
+                return Some(PageFlush::new(virt));
+            } else {
+                let next_table = table.next_level_table(i);
+                if let Some(next_table) = next_table {
+                    table = next_table;
+                } else {
+                    // 分配下一级页表
+                    let frame = allocator.allocate_one().unwrap();
+
+                    // 清空这个页帧
+                    MMArch::write_bytes(MMArch::phys_2_virt(frame).unwrap(), 0, MMArch::PAGE_SIZE);
+                    // 设置页表项的flags
+                    let flags: EntryFlags<Arch> = EntryFlags::new_page_table(false);
+
+                    // 把新分配的页表映射到当前页表
+                    table.set_entry(i, PageEntry::new(frame, flags));
+
+                    // 获取新分配的页表
+                    table = table.next_level_table(i).unwrap();
+                }
+            }
+        }
+    }
+}
+
+pub fn ident_pt_alloc() -> usize {
+    let new_imapper: IdentPageMapper<MMArch, LockedFrameAllocator> =
+        unsafe { IdentPageMapper::create(LockedFrameAllocator) };
+    new_imapper.paddr().data()
+}
+
+pub fn ident_map_page(table_paddr: usize, virt: usize, phys: usize) {
+    unsafe {
+        IdentPageMapper::<MMArch, LockedFrameAllocator>::map_phys(
+            PhysAddr::new(table_paddr),
+            VirtAddr::new(virt),
+            PhysAddr::new(phys),
+            LockedFrameAllocator,
+        )
+        .unwrap()
+        .flush();
+    };
+}
+
+/// 需要对齐
+pub fn ident_map_pages(table_paddr: usize, virt: usize, phys: usize, nums: usize) {
+    for i in 0..nums {
+        let virt = virt + i * MMArch::PAGE_SIZE;
+        let phys = phys + i * MMArch::PAGE_SIZE;
+        unsafe {
+            IdentPageMapper::<MMArch, LockedFrameAllocator>::map_phys(
+                PhysAddr::new(table_paddr),
+                VirtAddr::new(virt),
+                PhysAddr::new(phys),
+                LockedFrameAllocator,
+            )
+            .unwrap()
+            .flush()
+        };
+    }
+}

+ 2 - 0
kernel/src/mm/mod.rs

@@ -23,6 +23,7 @@ use self::{
 pub mod allocator;
 pub mod early_ioremap;
 pub mod fault;
+pub mod ident_map;
 pub mod init;
 pub mod kernel_mapper;
 pub mod madvise;
@@ -33,6 +34,7 @@ pub mod no_init;
 pub mod page;
 pub mod percpu;
 pub mod syscall;
+pub mod sysfs;
 pub mod truncate;
 pub mod ucontext;
 

+ 348 - 0
kernel/src/mm/sysfs.rs

@@ -0,0 +1,348 @@
+use alloc::{string::ToString, sync::Arc};
+use unified_init::macros::unified_init;
+
+use crate::{
+    driver::base::firmware::sys_firmware_kobj,
+    driver::base::{
+        kobject::{KObjType, KObject, KObjectManager, KObjectSysFSOps},
+        kset::KSet,
+    },
+    filesystem::{
+        sysfs::{Attribute, AttributeGroup, SysFSOps, SysFSOpsSupport, SYSFS_ATTR_MODE_RO},
+        vfs::syscall::ModeType,
+    },
+    init::initcall::INITCALL_POSTCORE,
+    libs::casting::DowncastArc,
+};
+
+use crate::driver::base::kobject::CommonKobj;
+use crate::driver::base::kobject::KObjectState;
+use crate::driver::base::kobject::LockedKObjectState;
+use crate::filesystem::kernfs::KernFSInode;
+use crate::init::boot::boot_callbacks;
+use crate::libs::rwlock::RwLockReadGuard;
+use crate::libs::rwlock::RwLockWriteGuard;
+use crate::libs::spinlock::SpinLock;
+use crate::libs::spinlock::SpinLockGuard;
+use alloc::collections::btree_map;
+use alloc::collections::BTreeMap;
+use alloc::string::String;
+use alloc::sync::Weak;
+use core::any::Any;
+
+use system_error::SystemError;
+
+/// `/sys/firmware/memmap`的CommonKobj
+static mut SYS_FIRMWARE_MEMMAP_KOBJ_INSTANCE: Option<Arc<CommonKobj>> = None;
+
+#[inline(always)]
+#[allow(dead_code)]
+pub fn sys_firmware_memmap_kobj() -> Arc<CommonKobj> {
+    unsafe { SYS_FIRMWARE_MEMMAP_KOBJ_INSTANCE.clone().unwrap() }
+}
+
+#[derive(Debug)]
+pub struct MemmapDesc {
+    inner: SpinLock<MemmapDescInner>,
+    kobj_state: LockedKObjectState,
+    name: String,
+}
+
+#[derive(Debug)]
+pub struct MemmapDescInner {
+    kern_inode: Option<Arc<KernFSInode>>,
+    kset: Option<Arc<KSet>>,
+    parent_kobj: Option<Weak<dyn KObject>>,
+    // 私有属性
+    pub start: usize,
+    pub end: usize,
+    pub memtype: usize,
+}
+
+impl MemmapDesc {
+    pub fn new(name: String, s: usize, e: usize, t: usize) -> Arc<Self> {
+        let md = MemmapDesc {
+            inner: SpinLock::new(MemmapDescInner {
+                kern_inode: None,
+                kset: None,
+                parent_kobj: None,
+                start: s,
+                end: e,
+                memtype: t,
+            }),
+            kobj_state: LockedKObjectState::new(Some(KObjectState::INITIALIZED)),
+            name: name.clone(),
+        };
+        Arc::new(md)
+    }
+
+    pub fn inner(&self) -> SpinLockGuard<'_, MemmapDescInner> {
+        self.inner.lock_irqsave()
+    }
+}
+
+#[derive(Debug)]
+struct MemmapDescAttrGroup;
+
+impl AttributeGroup for MemmapDescAttrGroup {
+    fn name(&self) -> Option<&str> {
+        None
+    }
+
+    fn attrs(&self) -> &[&'static dyn Attribute] {
+        &[&AttrStart, &AttrEnd, &AttrType]
+    }
+
+    fn is_visible(
+        &self,
+        _kobj: Arc<dyn KObject>,
+        attr: &'static dyn Attribute,
+    ) -> Option<ModeType> {
+        Some(attr.mode())
+    }
+}
+
+#[derive(Debug)]
+pub struct MemmapDescKObjType;
+
+impl KObjType for MemmapDescKObjType {
+    fn sysfs_ops(&self) -> Option<&dyn SysFSOps> {
+        Some(&KObjectSysFSOps)
+    }
+
+    fn attribute_groups(&self) -> Option<&'static [&'static dyn AttributeGroup]> {
+        Some(&[&MemmapDescAttrGroup])
+    }
+
+    fn release(&self, _kobj: Arc<dyn KObject>) {}
+}
+
+#[derive(Debug)]
+struct AttrStart;
+
+impl Attribute for AttrStart {
+    fn name(&self) -> &str {
+        "start"
+    }
+
+    fn mode(&self) -> ModeType {
+        SYSFS_ATTR_MODE_RO
+    }
+
+    fn support(&self) -> SysFSOpsSupport {
+        SysFSOpsSupport::ATTR_SHOW
+    }
+
+    fn show(&self, kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
+        let memmapd = kobj.downcast_arc::<MemmapDesc>().unwrap();
+        let start = memmapd.inner().start;
+        let start_string = format!("0x{:x}\n", start);
+        let bytes = start_string.as_bytes();
+        buf[..bytes.len()].copy_from_slice(bytes);
+        Ok(bytes.len())
+    }
+}
+
+#[derive(Debug)]
+struct AttrEnd;
+
+impl Attribute for AttrEnd {
+    fn name(&self) -> &str {
+        "end"
+    }
+
+    fn mode(&self) -> ModeType {
+        SYSFS_ATTR_MODE_RO
+    }
+
+    fn support(&self) -> SysFSOpsSupport {
+        SysFSOpsSupport::ATTR_SHOW
+    }
+
+    fn show(&self, kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
+        let memmapd = kobj.downcast_arc::<MemmapDesc>().unwrap();
+        let end = memmapd.inner().end;
+        let end_string = format!("0x{:x}\n", end);
+        let bytes = end_string.as_bytes();
+        buf[..bytes.len()].copy_from_slice(bytes);
+        Ok(bytes.len())
+    }
+}
+
+#[derive(Debug)]
+struct AttrType;
+
+impl Attribute for AttrType {
+    fn name(&self) -> &str {
+        "type"
+    }
+
+    fn mode(&self) -> ModeType {
+        SYSFS_ATTR_MODE_RO
+    }
+
+    fn support(&self) -> SysFSOpsSupport {
+        SysFSOpsSupport::ATTR_SHOW
+    }
+
+    fn show(&self, kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
+        let memmapd = kobj.downcast_arc::<MemmapDesc>().unwrap();
+        let mt = memmapd.inner().memtype;
+        match mt {
+            1 => {
+                let type_string = "System RAM\n".to_string();
+                let bytes = type_string.as_bytes();
+                buf[..bytes.len()].copy_from_slice(bytes);
+                Ok(bytes.len())
+            }
+            2 => {
+                let type_string = "Reserved\n".to_string();
+                let bytes = type_string.as_bytes();
+                buf[..bytes.len()].copy_from_slice(bytes);
+                Ok(bytes.len())
+            }
+            3 => {
+                let type_string = "ACPI Tables\n".to_string();
+                let bytes = type_string.as_bytes();
+                buf[..bytes.len()].copy_from_slice(bytes);
+                Ok(bytes.len())
+            }
+            _ => {
+                log::error!("Unknown memmap type!");
+                Err(SystemError::EINVAL)
+            }
+        }
+    }
+}
+
+impl KObject for MemmapDesc {
+    fn as_any_ref(&self) -> &dyn Any {
+        self
+    }
+
+    fn set_inode(&self, inode: Option<Arc<KernFSInode>>) {
+        self.inner().kern_inode = inode;
+    }
+
+    fn inode(&self) -> Option<Arc<KernFSInode>> {
+        self.inner().kern_inode.clone()
+    }
+
+    fn parent(&self) -> Option<Weak<dyn KObject>> {
+        self.inner().parent_kobj.clone()
+    }
+
+    fn set_parent(&self, parent: Option<Weak<dyn KObject>>) {
+        self.inner().parent_kobj = parent;
+    }
+
+    fn kset(&self) -> Option<Arc<KSet>> {
+        self.inner().kset.clone()
+    }
+
+    fn set_kset(&self, kset: Option<Arc<KSet>>) {
+        self.inner().kset = kset;
+    }
+
+    fn kobj_type(&self) -> Option<&'static dyn KObjType> {
+        Some(&MemmapDescKObjType)
+    }
+
+    fn set_kobj_type(&self, _ktype: Option<&'static dyn KObjType>) {}
+
+    fn name(&self) -> String {
+        self.name.clone()
+    }
+
+    fn set_name(&self, _name: String) {}
+
+    fn kobj_state(&self) -> RwLockReadGuard<'_, KObjectState> {
+        self.kobj_state.read()
+    }
+
+    fn kobj_state_mut(&self) -> RwLockWriteGuard<'_, KObjectState> {
+        self.kobj_state.write()
+    }
+
+    fn set_kobj_state(&self, state: KObjectState) {
+        *self.kobj_state_mut() = state;
+    }
+}
+
+static mut MEMMAP_DESC_MANAGER: Option<MemmapDescManager> = None;
+
+#[inline(always)]
+pub fn memmap_desc_manager() -> &'static mut MemmapDescManager {
+    return unsafe { MEMMAP_DESC_MANAGER.as_mut().unwrap() };
+}
+
+pub struct MemmapDescManager {
+    memmap_descs: BTreeMap<usize, Arc<MemmapDesc>>,
+}
+
+impl MemmapDescManager {
+    fn new() -> Self {
+        MemmapDescManager {
+            memmap_descs: BTreeMap::new(),
+        }
+    }
+
+    pub fn insert(&mut self, index: usize, desc: Arc<MemmapDesc>) {
+        self.memmap_descs.insert(index, desc);
+    }
+
+    pub fn iter_descs(&self) -> btree_map::Iter<'_, usize, Arc<MemmapDesc>> {
+        self.memmap_descs.iter()
+    }
+}
+
+#[inline(never)]
+pub fn early_memmap_init() {
+    let manager = MemmapDescManager::new();
+
+    unsafe {
+        MEMMAP_DESC_MANAGER = Some(manager);
+    }
+}
+
+/// 初始化 memmap 模块在 sysfs 中的目录
+#[unified_init(INITCALL_POSTCORE)]
+fn memmap_sysfs_init() -> Result<(), SystemError> {
+    // 下面这个函数应该换个地方, 因为做的的内容是一样的, 所以先放着
+    boot_callbacks()
+        .init_memmap_bp()
+        .expect("init bp memmap failed");
+    boot_callbacks()
+        .early_init_memmap_sysfs()
+        .expect("init sysfs memmap failed");
+
+    let memmap_kobj = CommonKobj::new("memmap".to_string());
+
+    let firm_kobj = sys_firmware_kobj();
+    memmap_kobj.set_parent(Some(Arc::downgrade(&(firm_kobj as Arc<dyn KObject>))));
+    KObjectManager::add_kobj(memmap_kobj.clone() as Arc<dyn KObject>).unwrap_or_else(|e| {
+        log::warn!("Failed to add memmap kobject to sysfs: {:?}", e);
+    });
+    unsafe {
+        SYS_FIRMWARE_MEMMAP_KOBJ_INSTANCE = Some(memmap_kobj);
+    }
+
+    // 把所有的memmap都注册到/sys/firmware/memmap下
+    for (index, desc) in memmap_desc_manager().iter_descs() {
+        memmap_sysfs_add(index, desc);
+    }
+
+    return Ok(());
+}
+
+fn memmap_sysfs_add(index: &usize, desc: &Arc<MemmapDesc>) {
+    if unsafe { SYS_FIRMWARE_MEMMAP_KOBJ_INSTANCE.is_none() } {
+        return;
+    }
+
+    let kobj = sys_firmware_memmap_kobj();
+    desc.set_parent(Some(Arc::downgrade(&(kobj as Arc<dyn KObject>))));
+    KObjectManager::add_kobj(desc.clone() as Arc<dyn KObject>).unwrap_or_else(|e| {
+        log::warn!("Failed to add memmap({index:?}) kobject to sysfs: {:?}", e);
+    });
+}

+ 44 - 0
kernel/src/process/exit.rs

@@ -78,6 +78,47 @@ pub fn kernel_wait4(
     return Ok(r);
 }
 
+pub fn kernel_waitid(
+    which: i32,
+    pid: i32,
+    _siginfo: *mut i32,
+    options: WaitOption,
+    _rusage_buf: Option<&mut RUsage>,
+) -> Result<usize, SystemError> {
+    let mut pid = pid;
+    if which == 0 {
+        pid = -1;
+    } else if which == 2 {
+        pid = -pid;
+    } else if which == 3 {
+        let file = ProcessManager::current_pcb()
+            .fd_table()
+            .read()
+            .get_file_by_fd(pid)
+            .unwrap();
+        if file.private_data.lock().is_pid() {
+            pid = file.private_data.lock().get_pid();
+        }
+    }
+    /*
+        log::info!(
+            "waitid, which:{}, tgid:{},cur:{}",
+            which,
+            pid,
+            ProcessManager::current_pcb().tgid().data()
+        );
+    */
+    let converter = PidConverter::from_id(pid).unwrap();
+    let mut kwo = KernelWaitOption::new(converter, options);
+
+    let r = do_wait(&mut kwo)?;
+
+    // TODO: 需要向用户写入信号相关信息
+
+    //log::info!("waitid done");
+    return Ok(r);
+}
+
 /// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/exit.c#1573
 fn do_wait(kwo: &mut KernelWaitOption) -> Result<usize, SystemError> {
     let mut retval: Result<usize, SystemError>;
@@ -110,6 +151,9 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result<usize, SystemError> {
         kwo.no_task_error = Some(SystemError::ECHILD);
         match &kwo.pid_converter {
             PidConverter::Pid(pid) => {
+                if pid.pid_vnr().data() == ProcessManager::current_pcb().raw_tgid().data() {
+                    return Err(SystemError::ECHILD);
+                }
                 let child_pcb = pid
                     .pid_task(PidType::PID)
                     .ok_or(SystemError::ECHILD)

+ 38 - 0
kernel/src/process/fork.rs

@@ -1,6 +1,12 @@
 use alloc::vec::Vec;
 use core::{intrinsics::unlikely, sync::atomic::Ordering};
 
+use crate::filesystem::vfs::file::File;
+use crate::filesystem::vfs::file::FileMode;
+use crate::filesystem::vfs::file::FilePrivateData;
+use crate::filesystem::vfs::syscall::ModeType;
+use crate::filesystem::vfs::FileType;
+use crate::process::pid::PidPrivateData;
 use alloc::{string::ToString, sync::Arc};
 use log::error;
 use system_error::SystemError;
@@ -429,6 +435,38 @@ impl ProcessManager {
             writer.copy_one_to_user(&(pcb.raw_pid().0 as i32), 0)?;
         }
 
+        // 克隆 pidfd
+        if clone_flags.contains(CloneFlags::CLONE_PIDFD) {
+            let pid = pcb.raw_pid().0 as i32;
+            let root_inode = ProcessManager::current_mntns().root_inode();
+            let name = format!(
+                "Pidfd(from {} to {})",
+                ProcessManager::current_pcb().raw_pid().data(),
+                pid
+            );
+            let new_inode = root_inode
+                .create(&name, FileType::File, ModeType::from_bits_truncate(0o777))
+                .unwrap();
+            let file = File::new(new_inode, FileMode::O_RDWR | FileMode::O_CLOEXEC).unwrap();
+            {
+                let mut guard = file.private_data.lock();
+                *guard = FilePrivateData::Pid(PidPrivateData::new(pid));
+            }
+            let r = current_pcb
+                .fd_table()
+                .write()
+                .alloc_fd(file, None)
+                .map(|fd| fd as usize);
+
+            let mut writer = UserBufferWriter::new(
+                clone_args.parent_tid.data() as *mut i32,
+                core::mem::size_of::<i32>(),
+                true,
+            )?;
+
+            writer.copy_one_to_user(&(r.unwrap() as i32), 0)?;
+        }
+
         sched_fork(pcb).unwrap_or_else(|e| {
             panic!(
                 "fork: Failed to set sched info from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",

+ 17 - 1
kernel/src/process/mod.rs

@@ -36,7 +36,7 @@ use crate::{
     ipc::{
         sighand::SigHand,
         signal::RestartBlock,
-        signal_types::{SigInfo, SigPending},
+        signal_types::{SigInfo, SigPending, SigStack},
     },
     libs::{
         align::AlignedBox,
@@ -727,6 +727,8 @@ pub struct ProcessControlBlock {
     /// 与信号处理相关的信息(似乎可以是无锁的)
     sig_info: RwLock<ProcessSignalInfo>,
     sighand: RwLock<Arc<SigHand>>,
+    /// 备用信号栈
+    sig_altstack: RwLock<SigStack>,
 
     /// 退出信号S
     exit_signal: AtomicSignal,
@@ -869,6 +871,7 @@ impl ProcessControlBlock {
                 arch_info,
                 sig_info: RwLock::new(ProcessSignalInfo::default()),
                 sighand: RwLock::new(SigHand::new()),
+                sig_altstack: RwLock::new(SigStack::new()),
                 exit_signal: AtomicSignal::new(Signal::SIGCHLD),
                 parent_pcb: RwLock::new(ppcb.clone()),
                 real_parent_pcb: RwLock::new(ppcb),
@@ -1129,6 +1132,14 @@ impl ProcessControlBlock {
         return &self.sched_info;
     }
 
+    pub fn sig_altstack(&self) -> RwLockReadGuard<'_, SigStack> {
+        self.sig_altstack.read_irqsave()
+    }
+
+    pub fn sig_altstack_mut(&self) -> RwLockWriteGuard<'_, SigStack> {
+        self.sig_altstack.write_irqsave()
+    }
+
     #[inline(always)]
     pub fn worker_private(&self) -> SpinLockGuard<'_, Option<WorkerPrivate>> {
         return self.worker_private.lock();
@@ -1139,6 +1150,11 @@ impl ProcessControlBlock {
         return self.pid;
     }
 
+    #[inline(always)]
+    pub fn raw_tgid(&self) -> RawPid {
+        return self.pid;
+    }
+
     #[inline(always)]
     pub fn fs_struct(&self) -> Arc<FsStruct> {
         self.fs.read().clone()

+ 19 - 0
kernel/src/process/pid.rs

@@ -29,6 +29,25 @@ impl PidType {
     pub const PIDTYPE_MAX: usize = PidType::MAX as usize;
 }
 
+/// 每个进程的 pid 私有信息, 通常作为 pidfd 的 private_data
+/// TODO: 未实现完, 参考https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/pid.h#59
+/// TODO: 应该替换所有的 pid 相关使用, 目前内核是直接使用传入的 pid, 应该全部转换为使用此结构体
+/// 例如 struct pid 应该是在进程创建时(如 fork(), clone())必然创建的
+#[derive(Clone, Debug)]
+pub struct PidPrivateData {
+    pid: i32,
+}
+
+impl PidPrivateData {
+    pub fn new(pid: i32) -> Self {
+        Self { pid }
+    }
+
+    pub fn pid(&self) -> i32 {
+        self.pid
+    }
+}
+
 pub struct Pid {
     self_ref: Weak<Pid>,
     pub level: u32,

+ 2 - 0
kernel/src/process/syscall/mod.rs

@@ -17,6 +17,7 @@ mod sys_getsid;
 mod sys_gettid;
 mod sys_getuid;
 mod sys_groups;
+mod sys_pidfdopen;
 pub mod sys_prlimit64;
 mod sys_set_tid_address;
 mod sys_setdomainname;
@@ -32,6 +33,7 @@ mod sys_setuid;
 mod sys_uname;
 mod sys_unshare;
 mod sys_wait4;
+mod sys_waitid;
 
 #[cfg(target_arch = "x86_64")]
 mod sys_fork;

+ 16 - 0
kernel/src/process/syscall/sys_execve.rs

@@ -1,4 +1,6 @@
 use alloc::string::String;
+#[allow(unused_imports)]
+use alloc::string::ToString;
 use alloc::sync::Arc;
 
 use crate::arch::interrupt::TrapFrame;
@@ -59,8 +61,22 @@ impl SysExecve {
         envp: *const *const u8,
     ) -> Result<(CString, Vec<CString>, Vec<CString>), SystemError> {
         let path: CString = check_and_clone_cstr(path, Some(MAX_PATHLEN))?;
+        #[cfg(not(feature = "initram"))]
         let argv: Vec<CString> = check_and_clone_cstr_array(argv)?;
+        #[cfg(feature = "initram")]
+        let mut argv: Vec<CString> = check_and_clone_cstr_array(argv)?;
         let envp: Vec<CString> = check_and_clone_cstr_array(envp)?;
+
+        // 这里需要处理符号链接, 目前内核没有完整实现, 这里是个简易的替代
+        // 例如执行/bin/echo, 必须拿到echo这个名字, 目前内核只有硬链接, 会执行/bin/busybox, 导致无法识别命令
+        #[cfg(feature = "initram")]
+        {
+            let real =
+                crate::filesystem::vfs::get_link_true_file(argv[0].to_string_lossy().to_string())
+                    .unwrap();
+            argv[0] = CString::new(real).unwrap();
+        }
+
         Ok((path, argv, envp))
     }
 

+ 74 - 0
kernel/src/process/syscall/sys_pidfdopen.rs

@@ -0,0 +1,74 @@
+use crate::alloc::string::ToString;
+use crate::arch::interrupt::TrapFrame;
+use crate::arch::syscall::nr::SYS_PIDFD_OPEN;
+use crate::filesystem::vfs::file::File;
+use crate::filesystem::vfs::file::FileMode;
+use crate::filesystem::vfs::file::FilePrivateData;
+use crate::filesystem::vfs::syscall::ModeType;
+use crate::filesystem::vfs::FileType;
+use crate::process::pid::PidPrivateData;
+use crate::process::ProcessManager;
+use crate::syscall::table::FormattedSyscallParam;
+use crate::syscall::table::Syscall;
+use alloc::vec::Vec;
+use system_error::SystemError;
+
+pub struct SysPidFdOpen;
+
+impl SysPidFdOpen {
+    #[inline(always)]
+    fn pid(args: &[usize]) -> i32 {
+        args[0] as i32
+    }
+
+    #[inline(always)]
+    fn flags(args: &[usize]) -> u32 {
+        args[1] as u32
+    }
+}
+
+impl Syscall for SysPidFdOpen {
+    fn num_args(&self) -> usize {
+        2
+    }
+
+    /// 没实现完全, 见https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/pid.c#pidfd_create
+    fn handle(&self, args: &[usize], _frame: &mut TrapFrame) -> Result<usize, SystemError> {
+        let pid = Self::pid(args);
+        let flags = Self::flags(args);
+
+        let mode = ModeType::from_bits(flags).unwrap();
+        let file_type = FileType::from(mode);
+        let file_mode = FileMode::from_bits(flags).unwrap();
+
+        let root_inode = ProcessManager::current_mntns().root_inode();
+        let name = format!(
+            "Pidfd(from {} to {})",
+            ProcessManager::current_pcb().raw_pid().data(),
+            pid
+        );
+        let new_inode = root_inode.create(&name, file_type, mode).unwrap();
+        let file = File::new(new_inode, file_mode).unwrap();
+        {
+            let mut guard = file.private_data.lock();
+            *guard = FilePrivateData::Pid(PidPrivateData::new(pid));
+        }
+
+        // 存入pcb
+        let r = ProcessManager::current_pcb()
+            .fd_table()
+            .write()
+            .alloc_fd(file, None)
+            .map(|fd| fd as usize);
+        r
+    }
+
+    fn entry_format(&self, args: &[usize]) -> Vec<FormattedSyscallParam> {
+        vec![
+            FormattedSyscallParam::new("pid", format!("{:#x}", Self::pid(args))),
+            FormattedSyscallParam::new("flags", Self::flags(args).to_string()),
+        ]
+    }
+}
+
+syscall_table_macros::declare_syscall!(SYS_PIDFD_OPEN, SysPidFdOpen);

+ 67 - 0
kernel/src/process/syscall/sys_waitid.rs

@@ -0,0 +1,67 @@
+use crate::arch::interrupt::TrapFrame;
+use crate::arch::syscall::nr::SYS_WAITID;
+use crate::process::abi::WaitOption;
+use crate::process::exit::kernel_waitid;
+use crate::syscall::table::FormattedSyscallParam;
+use crate::syscall::table::Syscall;
+use alloc::vec::Vec;
+use core::ffi::c_int;
+use system_error::SystemError;
+
+pub struct SysWaitId;
+
+impl SysWaitId {
+    #[inline(always)]
+    fn idtype(args: &[usize]) -> i32 {
+        args[0] as i32
+    }
+
+    #[inline(always)]
+    fn id(args: &[usize]) -> i32 {
+        args[1] as i32
+    }
+
+    #[inline(always)]
+    fn siginfo(args: &[usize]) -> *mut i32 {
+        args[2] as *mut i32
+    }
+
+    #[inline(always)]
+    fn options(args: &[usize]) -> c_int {
+        args[3] as c_int
+    }
+}
+
+impl Syscall for SysWaitId {
+    fn num_args(&self) -> usize {
+        4
+    }
+
+    fn handle(&self, args: &[usize], _frame: &mut TrapFrame) -> Result<usize, SystemError> {
+        let idtype = Self::idtype(args);
+        let id = Self::id(args);
+        let siginfo = Self::siginfo(args);
+        let options = Self::options(args);
+
+        //log::info!("waitid, which:{}, tgid:{}", idtype, id);
+
+        let options = WaitOption::from_bits(options as u32).ok_or(SystemError::EINVAL)?;
+
+        let r = kernel_waitid(idtype, id, siginfo, options, None)?;
+
+        //log::info!("waitid done, r:{}", r);
+
+        Ok(r)
+    }
+
+    fn entry_format(&self, args: &[usize]) -> Vec<FormattedSyscallParam> {
+        vec![
+            FormattedSyscallParam::new("idtype", format!("{:#x}", Self::idtype(args))),
+            FormattedSyscallParam::new("id", format!("{:#x}", Self::id(args))),
+            FormattedSyscallParam::new("siginfo", format!("{:#x}", Self::siginfo(args) as usize)),
+            FormattedSyscallParam::new("options", format!("{:#x}", Self::options(args))),
+        ]
+    }
+}
+
+syscall_table_macros::declare_syscall!(SYS_WAITID, SysWaitId);

+ 6 - 2
kernel/src/syscall/misc.rs

@@ -10,6 +10,8 @@ use system_error::SystemError;
 
 use super::{user_access::UserBufferWriter, Syscall};
 
+static mut UMASK: usize = 0o777;
+
 /// 系统信息
 ///
 /// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/include/uapi/linux/sysinfo.h#8
@@ -58,9 +60,11 @@ impl Syscall {
         return Ok(0);
     }
 
-    pub fn umask(_mask: u32) -> Result<usize, SystemError> {
+    pub fn umask(mask: u32) -> Result<usize, SystemError> {
         warn!("SYS_UMASK has not yet been implemented\n");
-        return Ok(0o777);
+        let r = unsafe { UMASK };
+        unsafe { UMASK = mask as usize };
+        return Ok(r);
     }
 
     /// ## 将随机字节填入buf

+ 0 - 5
kernel/src/syscall/mod.rs

@@ -309,11 +309,6 @@ impl Syscall {
 
             SYS_PPOLL => Self::ppoll(args[0], args[1] as u32, args[2], args[3]),
 
-            SYS_SIGALTSTACK => {
-                warn!("SYS_SIGALTSTACK has not yet been implemented");
-                Ok(0)
-            }
-
             SYS_SYSLOG => {
                 let syslog_action_type = args[0];
                 let buf_vaddr = args[1];