Browse Source

Merge remote-tracking branch 'upstream/master' into feat-network-rebuild

Samuka007 2 months ago
parent
commit
c4c35ed0cc
100 changed files with 3004 additions and 1042 deletions
  1. 8 8
      .github/workflows/makefile.yml
  2. 1 0
      docs/introduction/build_system.md
  3. 2 0
      docs/kernel/debug/debug-kernel-with-gdb.md
  4. 1 0
      docs/kernel/process_management/kthread.md
  5. 1 0
      docs/kernel/sched/cfs.md
  6. 2 2
      kernel/Cargo.toml
  7. 3 3
      kernel/crates/rbpf/src/interpreter.rs
  8. 6 6
      kernel/crates/rbpf/src/stack.rs
  9. 1 1
      kernel/crates/system_error/Cargo.toml
  10. 32 12
      kernel/crates/system_error/src/lib.rs
  11. 17 1
      kernel/src/arch/riscv64/ipc/signal.rs
  12. 2 0
      kernel/src/arch/riscv64/mod.rs
  13. 4 4
      kernel/src/arch/x86_64/asm/entry.S
  14. 27 0
      kernel/src/arch/x86_64/interrupt/mod.rs
  15. 152 80
      kernel/src/arch/x86_64/ipc/signal.rs
  16. 2 1
      kernel/src/arch/x86_64/mm/fault.rs
  17. 2 0
      kernel/src/arch/x86_64/syscall/mod.rs
  18. 1 0
      kernel/src/bpf/helper/consts.rs
  19. 6 0
      kernel/src/bpf/helper/mod.rs
  20. 1 1
      kernel/src/bpf/mod.rs
  21. 1 0
      kernel/src/driver/base/device/mod.rs
  22. 0 1
      kernel/src/driver/base/init.rs
  23. 2 6
      kernel/src/driver/net/dma.rs
  24. 1 1
      kernel/src/driver/tty/tty_device.rs
  25. 8 4
      kernel/src/driver/tty/tty_job_control.rs
  26. 11 7
      kernel/src/driver/tty/tty_ldisc/ntty.rs
  27. 2 6
      kernel/src/driver/virtio/virtio_impl.rs
  28. 46 0
      kernel/src/exception/entry.rs
  29. 1 0
      kernel/src/exception/mod.rs
  30. 39 13
      kernel/src/filesystem/eventfd.rs
  31. 98 29
      kernel/src/filesystem/fat/fs.rs
  32. 1 0
      kernel/src/filesystem/mod.rs
  33. 346 0
      kernel/src/filesystem/page_cache.rs
  34. 26 1
      kernel/src/filesystem/procfs/mod.rs
  35. 25 108
      kernel/src/filesystem/vfs/file.rs
  36. 58 6
      kernel/src/filesystem/vfs/mod.rs
  37. 23 5
      kernel/src/filesystem/vfs/mount.rs
  38. 16 0
      kernel/src/filesystem/vfs/syscall.rs
  39. 4 1
      kernel/src/ipc/pipe.rs
  40. 27 35
      kernel/src/ipc/shm.rs
  41. 202 30
      kernel/src/ipc/signal.rs
  42. 11 5
      kernel/src/ipc/signal_types.rs
  43. 73 25
      kernel/src/ipc/syscall.rs
  44. 1 1
      kernel/src/libs/semaphore.rs
  45. 86 34
      kernel/src/libs/wait_queue.rs
  46. 189 0
      kernel/src/misc/events/kprobe/device.rs
  47. 31 0
      kernel/src/misc/events/kprobe/mod.rs
  48. 28 0
      kernel/src/misc/events/mod.rs
  49. 52 0
      kernel/src/misc/events/subsys.rs
  50. 1 0
      kernel/src/misc/mod.rs
  51. 4 26
      kernel/src/mm/allocator/page_frame.rs
  52. 23 38
      kernel/src/mm/fault.rs
  53. 342 144
      kernel/src/mm/page.rs
  54. 34 18
      kernel/src/mm/ucontext.rs
  55. 10 4
      kernel/src/net/event_poll/mod.rs
  56. 2 2
      kernel/src/net/event_poll/syscall.rs
  57. 68 60
      kernel/src/perf/bpf.rs
  58. 10 7
      kernel/src/perf/kprobe.rs
  59. 2 1
      kernel/src/perf/mod.rs
  60. 63 37
      kernel/src/process/exit.rs
  61. 136 51
      kernel/src/process/mod.rs
  62. 11 8
      kernel/src/sched/completion.rs
  63. 11 2
      kernel/src/syscall/mod.rs
  64. 1 1
      tools/BUILD_CONTAINER_VERSION
  65. 11 10
      tools/bootstrap.sh
  66. 1 1
      tools/build_in_docker.sh
  67. 2 1
      tools/change_rust_src.sh
  68. 3 2
      tools/run-qemu.sh
  69. 1 0
      user/apps/syscall_ebpf/.gitignore
  70. 31 0
      user/apps/syscall_ebpf/Cargo.toml
  71. 6 10
      user/apps/syscall_ebpf/Makefile
  72. 33 0
      user/apps/syscall_ebpf/README.md
  73. 4 0
      user/apps/syscall_ebpf/rustfmt.toml
  74. 1 1
      user/apps/syscall_ebpf/syscall_ebpf-common/Cargo.toml
  75. 0 0
      user/apps/syscall_ebpf/syscall_ebpf-common/src/lib.rs
  76. 12 0
      user/apps/syscall_ebpf/syscall_ebpf-ebpf/.cargo/config.toml
  77. 17 0
      user/apps/syscall_ebpf/syscall_ebpf-ebpf/Cargo.toml
  78. 17 0
      user/apps/syscall_ebpf/syscall_ebpf-ebpf/build.rs
  79. 3 0
      user/apps/syscall_ebpf/syscall_ebpf-ebpf/rust-toolchain.toml
  80. 3 0
      user/apps/syscall_ebpf/syscall_ebpf-ebpf/src/lib.rs
  81. 50 0
      user/apps/syscall_ebpf/syscall_ebpf-ebpf/src/main.rs
  82. 35 0
      user/apps/syscall_ebpf/syscall_ebpf/Cargo.toml
  83. 150 0
      user/apps/syscall_ebpf/syscall_ebpf/build.rs
  84. 74 0
      user/apps/syscall_ebpf/syscall_ebpf/src/main.rs
  85. 1 0
      user/apps/test-sigprocmask/.gitignore
  86. 20 0
      user/apps/test-sigprocmask/Makefile
  87. 132 0
      user/apps/test-sigprocmask/main.c
  88. 0 3
      user/apps/test_ebpf/.gitignore
  89. 0 16
      user/apps/test_ebpf/Cargo.toml
  90. 0 60
      user/apps/test_ebpf/src/main.rs
  91. 0 2
      user/apps/test_ebpf/syscall_ebpf/.cargo/config.toml
  92. 0 3
      user/apps/test_ebpf/syscall_ebpf/.vscode/settings.json
  93. 0 3
      user/apps/test_ebpf/syscall_ebpf/Cargo.toml
  94. 0 32
      user/apps/test_ebpf/syscall_ebpf/README.md
  95. 0 6
      user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.cargo/config.toml
  96. 0 2
      user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.helix/config.toml
  97. 0 4
      user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.vim/coc-settings.json
  98. 0 4
      user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.vscode/settings.json
  99. 0 33
      user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/Cargo.toml
  100. 0 13
      user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/rust-toolchain.toml

+ 8 - 8
.github/workflows/makefile.yml

@@ -11,14 +11,14 @@ jobs:
     name: Format check ${{ matrix.arch }}
     runs-on: ubuntu-latest
     continue-on-error: true
-    container: dragonos/dragonos-dev:v1.7
+    container: dragonos/dragonos-dev:v1.8
 
     strategy:
       matrix:
         arch: [x86_64, riscv64]
 
     steps:
-      - run: echo "Running in dragonos/dragonos-dev:v1.7"
+      - run: echo "Running in dragonos/dragonos-dev:v1.8"
       - uses: actions/checkout@v3
 
       - name: Format check
@@ -35,14 +35,14 @@ jobs:
     name: Kernel static test ${{ matrix.arch }}
     runs-on: ubuntu-latest
     continue-on-error: true
-    container: dragonos/dragonos-dev:v1.7
+    container: dragonos/dragonos-dev:v1.8
 
     strategy:
       matrix:
         arch: [x86_64, riscv64]
 
     steps:
-      - run: echo "Running in dragonos/dragonos-dev:v1.7"
+      - run: echo "Running in dragonos/dragonos-dev:v1.8"
 
       - uses: actions/checkout@v3
 
@@ -55,10 +55,10 @@ jobs:
 
   build-x86_64:
     runs-on: ubuntu-latest
-    container: dragonos/dragonos-dev:v1.7
+    container: dragonos/dragonos-dev:v1.8
 
     steps:
-      - run: echo "Running in dragonos/dragonos-dev:v1.7"
+      - run: echo "Running in dragonos/dragonos-dev:v1.8"
 
       - uses: actions/checkout@v3
       - name: build the DragonOS
@@ -77,10 +77,10 @@ jobs:
 
   build-riscv64:
     runs-on: ubuntu-latest
-    container: dragonos/dragonos-dev:v1.7
+    container: dragonos/dragonos-dev:v1.8
 
     steps:
-      - run: echo "Running in dragonos/dragonos-dev:v1.7"
+      - run: echo "Running in dragonos/dragonos-dev:v1.8"
 
       - uses: actions/checkout@v3
         with:

+ 1 - 0
docs/introduction/build_system.md

@@ -215,6 +215,7 @@ make run-docker
 ### 5.1 创建磁盘镜像
 
   首先,您需要使用**普通用户**权限运行`tools/create_hdd_image.sh`,为DragonOS创建一块磁盘镜像文件。该脚本会自动完成创建磁盘镜像的工作,并将其移动到`bin/`目录下。
+
   请注意,由于权限问题,请务必使用**普通用户**权限运行此脚本。(运行后,需要提升权限时,系统可能会要求您输入密码)
 
 

+ 2 - 0
docs/kernel/debug/debug-kernel-with-gdb.md

@@ -3,6 +3,7 @@
 
 ## 前言
   GDB是一个功能强大的开源调试工具,能够帮助您更好的诊断和修复程序中的错误。
+
   它提供了一套丰富的功能,使您能够检查程序的执行状态、跟踪代码的执行流程、查看和修改变量的值、分析内存状态等。它可以与编译器配合使用,以便您在调试过程中访问程序的调试信息。
 
   此教程将告诉您如何在DragonOS中使用`rust-gdb`来调试内核,包括如何开始调试以及相应的调试命令。
@@ -29,6 +30,7 @@ debug = true
 ### 1.2 运行DragonOS
 
   准备工作完成后,您就可以编译、运行DragonOS来开展后续的调试工作了。
+
   在DragonOS根目录中开启终端,使用`make run`即可开始编译运行DragonOS,如需更多编译命令方面的帮助,详见
 > [构建DragonOS](https://docs.dragonos.org/zh_CN/latest/introduction/build_system.html)。
 

+ 1 - 0
docs/kernel/process_management/kthread.md

@@ -9,6 +9,7 @@
   内核线程的创建是通过调用`KernelThreadMechanism::create()`或者`KernelThreadMechanism::create_and_run()`函数,向`kthreadd`守护线程发送创建任务来实现的。也就是说,内核线程的创建,最终是由`kthread_daemon`来完成。
 
   当内核线程被创建后,默认处于睡眠状态,要使用`ProcessManager::wakeup`函数将其唤醒。
+
   当内核其他模块想要停止一个内核线程的时候,可以调用`KernelThreadMechanism::stop()`函数,等待内核线程的退出,然后获得返回值并清理内核线程的pcb。
 
   内核线程应当经常检查`KernelThreadMechanism::should_stop()`的结果,以确定其是否要退出。当检测到需要退出时,内核线程返回一个返回码,即可退出。(注意资源的清理)

+ 1 - 0
docs/kernel/sched/cfs.md

@@ -16,6 +16,7 @@
   ``FairSchedEntity``是完全公平调度器中最重要的结构体,他代表一个实体单位,它不止表示一个进程,它还可以是一个组或者一个用户,但是它在cfs队列中所表示的就单单是一个调度实体。这样的设计可以为上层提供更多的思路,比如上层可以把不同的进程归纳到一个调度实体从而实现组调度等功能而不需要改变调度算法。
 
   在cfs中,整体的结构是**一棵树**,每一个调度实体作为``cfs_rq``中的一个节点,若该调度实体不是单个进程(它可能是一个进程组),则在该调度实体中还需要维护一个自己的``cfs_rq``,这样的嵌套展开后,每一个叶子节点就是一个单独的进程。需要理解这样一棵树,**在后续文档中会以这棵树为核心讲解**。
+
   该结构体具体的字段意义请查阅源代码。这里提及几个重要的字段:
 
 

+ 2 - 2
kernel/Cargo.toml

@@ -63,7 +63,6 @@ paste = "=1.0.14"
 slabmalloc = { path = "crates/rust-slabmalloc" }
 log = "0.4.21"
 kprobe = { path = "crates/kprobe" }
-xarray = "0.1.0"
 lru = "0.12.3"
 
 rbpf = { path = "crates/rbpf" }
@@ -76,6 +75,7 @@ unwinding = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/unwi
     "panic",
     "personality"
 ]}
+defer = "0.2.1"
 
 # target为x86_64时,使用下面的依赖
 [target.'cfg(target_arch = "x86_64")'.dependencies]
@@ -106,4 +106,4 @@ debug = true   # Controls whether the compiler passes `-g`
 
 # The release profile, used for `cargo build --release`
 [profile.release]
-debug = false
+debug = true

+ 3 - 3
kernel/crates/rbpf/src/interpreter.rs

@@ -660,9 +660,9 @@ pub fn execute_program(
                             // Save the callee saved registers
                             pre_stack.save_registers(&reg[6..=9]);
                             // Save the return address
-                            pre_stack.save_return_address(insn_ptr as u16);
+                            pre_stack.save_return_address(insn_ptr as u64);
                             // save the stack pointer
-                            pre_stack.save_sp(reg[10] as u16);
+                            pre_stack.save_sp(reg[10]);
                             let mut stack = StackFrame::new();
                             log::trace!("BPF TO BPF CALL: new pc: {} + {} = {}",insn_ptr ,insn.imm,insn_ptr + insn.imm as usize);
                             reg[10] = stack.as_ptr() as u64 + stack.len() as u64;
@@ -695,7 +695,7 @@ pub fn execute_program(
                     // Restore the return address
                     insn_ptr = stack.get_return_address() as usize;
                     // Restore the stack pointer
-                    reg[10] = stack.get_sp() as u64;
+                    reg[10] = stack.get_sp();
                     log::trace!("EXIT: new pc: {}", insn_ptr);
                 }
             }

+ 6 - 6
kernel/crates/rbpf/src/stack.rs

@@ -1,9 +1,9 @@
 use crate::{ebpf::STACK_SIZE, vec, Vec};
 
 pub struct StackFrame {
-    return_address: u16,
+    return_address: u64,
     saved_registers: [u64; 4],
-    sp: u16,
+    sp: u64,
     frame: Vec<u8>,
 }
 
@@ -54,22 +54,22 @@ impl StackFrame {
     }
 
     /// Save the return address
-    pub fn save_return_address(&mut self, address: u16) {
+    pub fn save_return_address(&mut self, address: u64) {
         self.return_address = address;
     }
 
     /// Get the return address
-    pub fn get_return_address(&self) -> u16 {
+    pub fn get_return_address(&self) -> u64 {
         self.return_address
     }
 
     /// Save the stack pointer
-    pub fn save_sp(&mut self, sp: u16) {
+    pub fn save_sp(&mut self, sp: u64) {
         self.sp = sp;
     }
 
     /// Get the stack pointer
-    pub fn get_sp(&self) -> u16 {
+    pub fn get_sp(&self) -> u64 {
         self.sp
     }
 }

+ 1 - 1
kernel/crates/system_error/Cargo.toml

@@ -7,4 +7,4 @@ edition = "2021"
 
 [dependencies]
 num-traits = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/num-traits.git", rev="1597c1c", default-features = false }
-num-derive = "0.3"
+num-derive = "0.3"

+ 32 - 12
kernel/crates/system_error/src/lib.rs

@@ -277,31 +277,51 @@ pub enum SystemError {
 
     // === 以下错误码不应该被用户态程序使用 ===
     ERESTARTSYS = 512,
+    ERESTARTNOINTR = 513,
+    /// restart if no handler
+    ERESTARTNOHAND = 514,
+
+    /// 没有对应的ioctlcmd
+    ENOIOCTLCMD = 515,
+    /// restart by calling sys restart syscall
+    ERESTART_RESTARTBLOCK = 516,
+
+    // === TODO: 这几个KVM的错误码不要放在这里 ===
+
     // VMX on 虚拟化开启指令出错
-    EVMXONFailed = 513,
+    EVMXONFailed = 1513,
     // VMX off 虚拟化关闭指令出错
-    EVMXOFFFailed = 514,
+    EVMXOFFFailed = 1514,
     // VMX VMWRITE 写入虚拟化VMCS内存出错
-    EVMWRITEFailed = 515,
-    EVMREADFailed = 516,
-    EVMPRTLDFailed = 517,
-    EVMLAUNCHFailed = 518,
-    KVM_HVA_ERR_BAD = 519,
-    /// 没有对应的ioctlcmd
-    ENOIOCTLCMD = 520,
+    EVMWRITEFailed = 1515,
+    EVMREADFailed = 1516,
+    EVMPRTLDFailed = 1517,
+    EVMLAUNCHFailed = 1518,
+    KVM_HVA_ERR_BAD = 1519,
+
+    MAXERRNO = 4095,
 }
 
 impl SystemError {
-    /// @brief 把posix错误码转换为系统错误枚举类型。
+    /// 判断一个值是否是有效的posix错误码。
+    pub fn is_valid_posix_errno<T>(val: T) -> bool
+    where
+        T: PartialOrd + From<i32>,
+    {
+        let max_errno = T::from(-(Self::MAXERRNO as i32));
+        val < T::from(0) && val >= max_errno
+    }
+
+    /// 尝试把posix错误码转换为系统错误枚举类型。
     pub fn from_posix_errno(errno: i32) -> Option<SystemError> {
         // posix 错误码是小于0的
-        if errno >= 0 {
+        if !Self::is_valid_posix_errno(errno) {
             return None;
         }
         return <Self as num_traits::FromPrimitive>::from_i32(-errno);
     }
 
-    /// @brief 把系统错误枚举类型转换为负数posix错误码。
+    /// 把系统错误枚举类型转换为负数posix错误码。
     pub fn to_posix_errno(&self) -> i32 {
         return -<Self as num_traits::ToPrimitive>::to_i32(self).unwrap();
     }

+ 17 - 1
kernel/src/arch/riscv64/ipc/signal.rs

@@ -1,8 +1,9 @@
 use log::error;
 
 use crate::{
-    arch::{sched::sched, CurrentIrqArch},
+    arch::{interrupt::TrapFrame, sched::sched, CurrentIrqArch},
     exception::InterruptArch,
+    ipc::signal_types::SignalArch,
     process::ProcessManager,
 };
 
@@ -339,3 +340,18 @@ fn sig_continue(sig: Signal) {
 fn sig_ignore(_sig: Signal) {
     return;
 }
+
+pub struct RiscV64SignalArch;
+
+impl SignalArch for RiscV64SignalArch {
+    // TODO: 为RISCV64实现信号处理
+    // 注意,rv64现在在中断/系统调用返回用户态时,没有进入 irqentry_exit() 函数,
+    // 到时候实现信号处理时,需要修改中断/系统调用返回用户态的代码,进入 irqentry_exit() 函数
+    unsafe fn do_signal_or_restart(_frame: &mut TrapFrame) {
+        todo!()
+    }
+
+    fn sys_rt_sigreturn(_trap_frame: &mut TrapFrame) -> u64 {
+        todo!()
+    }
+}

+ 2 - 0
kernel/src/arch/riscv64/mod.rs

@@ -27,6 +27,8 @@ pub use self::time::RiscV64TimeArch as CurrentTimeArch;
 
 pub use self::elf::RiscV64ElfArch as CurrentElfArch;
 
+pub use self::ipc::signal::RiscV64SignalArch as CurrentSignalArch;
+
 pub use crate::arch::smp::RiscV64SMPArch as CurrentSMPArch;
 
 pub use crate::arch::sched::RiscV64SchedArch as CurrentSchedArch;

+ 4 - 4
kernel/src/arch/x86_64/asm/entry.S

@@ -64,9 +64,9 @@ ENTRY(ret_from_intr)
     // 进入信号处理流程
     cli
 
-    // 将原本要返回的栈帧的栈指针传入do_signal的第一个参数
+    // 将原本要返回的栈帧的栈指针传入irqentry_exit的第一个参数
     movq %rsp, %rdi
-    callq do_signal
+    callq irqentry_exit
     cli
 
 __entry_ret_from_intr_before_gs_check_2:
@@ -375,10 +375,10 @@ ENTRY(syscall_64)
     sti
     callq *%rdx //调用服务程序
 
-    // 将原本要返回的栈帧的栈指针传入do_signal的第一个参数
+    // 将原本要返回的栈帧的栈指针传入 irqentry_exit 的第一个参数
     movq %rsp, %rdi
 
-    callq do_signal
+    callq irqentry_exit
 
     cli
     

+ 27 - 0
kernel/src/arch/x86_64/interrupt/mod.rs

@@ -125,6 +125,8 @@ pub struct TrapFrame {
     pub es: ::core::ffi::c_ulong,
     pub rax: ::core::ffi::c_ulong,
     pub func: ::core::ffi::c_ulong,
+    /// - 该字段在异常发生时,保存的是错误码
+    /// - 在系统调用时,由系统调用入口函数将其设置为系统调用号
     pub errcode: ::core::ffi::c_ulong,
     pub rip: ::core::ffi::c_ulong,
     pub cs: ::core::ffi::c_ulong,
@@ -182,6 +184,31 @@ impl TrapFrame {
     pub fn set_pc(&mut self, pc: usize) {
         self.rip = pc as u64;
     }
+
+    /// 获取系统调用号
+    ///
+    /// # Safety
+    /// 该函数只能在系统调用上下文中调用,
+    /// 在其他上下文中,该函数返回值未定义
+    pub unsafe fn syscall_nr(&self) -> Option<usize> {
+        if self.errcode == u64::MAX {
+            return None;
+        }
+        Some(self.errcode as usize)
+    }
+
+    /// 获取系统调用错误码
+    ///
+    /// # Safety
+    /// 该函数只能在系统调用上下文中调用,
+    /// 在其他上下文中,该函数返回值未定义
+    ///
+    /// # Returns
+    /// 返回一个 `Option<SystemError>`,表示系统调用的错误码。
+    pub unsafe fn syscall_error(&self) -> Option<SystemError> {
+        let val = self.rax as i32;
+        SystemError::from_posix_errno(val)
+    }
 }
 
 impl ProbeArgs for TrapFrame {

+ 152 - 80
kernel/src/arch/x86_64/ipc/signal.rs

@@ -1,5 +1,6 @@
 use core::{ffi::c_void, intrinsics::unlikely, mem::size_of};
 
+use defer::defer;
 use log::error;
 use system_error::SystemError;
 
@@ -8,11 +9,12 @@ use crate::{
         fpu::FpState,
         interrupt::TrapFrame,
         process::table::{USER_CS, USER_DS},
+        syscall::nr::SYS_RESTART_SYSCALL,
         CurrentIrqArch, MMArch,
     },
     exception::InterruptArch,
     ipc::{
-        signal::set_current_sig_blocked,
+        signal::{restore_saved_sigmask, set_current_blocked},
         signal_types::{SaHandlerType, SigInfo, Sigaction, SigactionType, SignalArch},
     },
     mm::MemoryManagementArch,
@@ -405,99 +407,147 @@ pub struct SigStack {
     pub fpstate: FpState,
 }
 
-#[no_mangle]
-unsafe extern "C" fn do_signal(frame: &mut TrapFrame) {
-    X86_64SignalArch::do_signal(frame);
-    return;
-}
+unsafe fn do_signal(frame: &mut TrapFrame, got_signal: &mut bool) {
+    let pcb = ProcessManager::current_pcb();
 
-pub struct X86_64SignalArch;
+    let siginfo = pcb.try_siginfo_irqsave(5);
 
-impl SignalArch for X86_64SignalArch {
-    unsafe fn do_signal(frame: &mut TrapFrame) {
-        let pcb = ProcessManager::current_pcb();
+    if unlikely(siginfo.is_none()) {
+        return;
+    }
 
-        let siginfo = pcb.try_siginfo_irqsave(5);
+    let siginfo_read_guard = siginfo.unwrap();
 
-        if unlikely(siginfo.is_none()) {
-            return;
-        }
+    // 检查sigpending是否为0
+    if siginfo_read_guard.sig_pending().signal().bits() == 0 || !frame.is_from_user() {
+        // 若没有正在等待处理的信号,或者将要返回到的是内核态,则返回
+        return;
+    }
 
-        let siginfo_read_guard = siginfo.unwrap();
+    let mut sig_number: Signal;
+    let mut info: Option<SigInfo>;
+    let mut sigaction: Option<Sigaction>;
+    let sig_block: SigSet = *siginfo_read_guard.sig_blocked();
+    drop(siginfo_read_guard);
 
-        // 检查sigpending是否为0
-        if siginfo_read_guard.sig_pending().signal().bits() == 0 || !frame.is_from_user() {
-            // 若没有正在等待处理的信号,或者将要返回到的是内核态,则返回
-            return;
-        }
+    let sig_guard = pcb.try_sig_struct_irqsave(5);
+    if unlikely(sig_guard.is_none()) {
+        return;
+    }
+    let siginfo_mut = pcb.try_siginfo_mut(5);
+    if unlikely(siginfo_mut.is_none()) {
+        return;
+    }
 
-        let mut sig_number: Signal;
-        let mut info: Option<SigInfo>;
-        let mut sigaction: Sigaction;
-        let sig_block: SigSet = *siginfo_read_guard.sig_block();
-        drop(siginfo_read_guard);
+    let sig_guard = sig_guard.unwrap();
+    let mut siginfo_mut_guard = siginfo_mut.unwrap();
+    loop {
+        (sig_number, info) = siginfo_mut_guard.dequeue_signal(&sig_block, &pcb);
 
-        let sig_guard = pcb.try_sig_struct_irqsave(5);
-        if unlikely(sig_guard.is_none()) {
+        // 如果信号非法,则直接返回
+        if sig_number == Signal::INVALID {
             return;
         }
-        let siginfo_mut = pcb.try_siginfo_mut(5);
-        if unlikely(siginfo_mut.is_none()) {
-            return;
+        let sa = sig_guard.handlers[sig_number as usize - 1];
+
+        match sa.action() {
+            SigactionType::SaHandler(action_type) => match action_type {
+                SaHandlerType::Error => {
+                    error!("Trying to handle a Sigerror on Process:{:?}", pcb.pid());
+                    return;
+                }
+                SaHandlerType::Default => {
+                    sigaction = Some(sa);
+                }
+                SaHandlerType::Ignore => continue,
+                SaHandlerType::Customized(_) => {
+                    sigaction = Some(sa);
+                }
+            },
+            SigactionType::SaSigaction(_) => todo!(),
         }
 
-        let sig_guard = sig_guard.unwrap();
-        let mut siginfo_mut_guard = siginfo_mut.unwrap();
-        loop {
-            (sig_number, info) = siginfo_mut_guard.dequeue_signal(&sig_block);
-            // 如果信号非法,则直接返回
-            if sig_number == Signal::INVALID {
-                return;
-            }
+        if sigaction.is_some() {
+            break;
+        }
+    }
 
-            sigaction = sig_guard.handlers[sig_number as usize - 1];
+    let oldset = *siginfo_mut_guard.sig_blocked();
+    //避免死锁
+    drop(siginfo_mut_guard);
+    drop(sig_guard);
+    drop(pcb);
+    // 做完上面的检查后,开中断
+    CurrentIrqArch::interrupt_enable();
 
-            match sigaction.action() {
-                SigactionType::SaHandler(action_type) => match action_type {
-                    SaHandlerType::Error => {
-                        error!("Trying to handle a Sigerror on Process:{:?}", pcb.pid());
-                        return;
-                    }
-                    SaHandlerType::Default => {
-                        sigaction = Sigaction::default();
-                        break;
-                    }
-                    SaHandlerType::Ignore => continue,
-                    SaHandlerType::Customized(_) => {
-                        break;
-                    }
-                },
-                SigactionType::SaSigaction(_) => todo!(),
-            }
-            // 如果当前动作是忽略这个信号,就继续循环。
+    if sigaction.is_none() {
+        return;
+    }
+    *got_signal = true;
+
+    let mut sigaction = sigaction.unwrap();
+
+    // 注意!由于handle_signal里面可能会退出进程,
+    // 因此这里需要检查清楚:上面所有的锁、arc指针都被释放了。否则会产生资源泄露的问题!
+    let res: Result<i32, SystemError> =
+        handle_signal(sig_number, &mut sigaction, &info.unwrap(), &oldset, frame);
+    if res.is_err() {
+        error!(
+            "Error occurred when handling signal: {}, pid={:?}, errcode={:?}",
+            sig_number as i32,
+            ProcessManager::current_pcb().pid(),
+            res.as_ref().unwrap_err()
+        );
+    }
+}
+
+fn try_restart_syscall(frame: &mut TrapFrame) {
+    defer!({
+        // 如果没有信号需要传递,我们只需恢复保存的信号掩码
+        restore_saved_sigmask();
+    });
+
+    if unsafe { frame.syscall_nr() }.is_none() {
+        return;
+    }
+
+    let syscall_err = unsafe { frame.syscall_error() };
+    if syscall_err.is_none() {
+        return;
+    }
+    let syscall_err = syscall_err.unwrap();
+
+    let mut restart = false;
+    match syscall_err {
+        SystemError::ERESTARTSYS | SystemError::ERESTARTNOHAND | SystemError::ERESTARTNOINTR => {
+            frame.rax = frame.errcode;
+            frame.rip -= 2;
+            restart = true;
         }
+        SystemError::ERESTART_RESTARTBLOCK => {
+            frame.rax = SYS_RESTART_SYSCALL as u64;
+            frame.rip -= 2;
+            restart = true;
+        }
+        _ => {}
+    }
+    log::debug!("try restart syscall: {:?}", restart);
+}
+
+pub struct X86_64SignalArch;
 
-        let oldset = *siginfo_mut_guard.sig_block();
-        //避免死锁
-        drop(siginfo_mut_guard);
-        drop(sig_guard);
-        drop(pcb);
-
-        // 做完上面的检查后,开中断
-        CurrentIrqArch::interrupt_enable();
-
-        // 注意!由于handle_signal里面可能会退出进程,
-        // 因此这里需要检查清楚:上面所有的锁、arc指针都被释放了。否则会产生资源泄露的问题!
-        let res: Result<i32, SystemError> =
-            handle_signal(sig_number, &mut sigaction, &info.unwrap(), &oldset, frame);
-        if res.is_err() {
-            error!(
-                "Error occurred when handling signal: {}, pid={:?}, errcode={:?}",
-                sig_number as i32,
-                ProcessManager::current_pcb().pid(),
-                res.as_ref().unwrap_err()
-            );
+impl SignalArch for X86_64SignalArch {
+    /// 处理信号,并尝试重启系统调用
+    ///
+    /// 参考: https://code.dragonos.org.cn/xref/linux-6.1.9/arch/x86/kernel/signal.c#865
+    unsafe fn do_signal_or_restart(frame: &mut TrapFrame) {
+        let mut got_signal = false;
+        do_signal(frame, &mut got_signal);
+
+        if got_signal {
+            return;
         }
+        try_restart_syscall(frame);
     }
 
     fn sys_rt_sigreturn(trap_frame: &mut TrapFrame) -> u64 {
@@ -511,7 +561,7 @@ impl SignalArch for X86_64SignalArch {
             return trap_frame.rax;
         }
         let mut sigmask: SigSet = unsafe { (*frame).context.oldmask };
-        set_current_sig_blocked(&mut sigmask);
+        set_current_blocked(&mut sigmask);
         // 从用户栈恢复sigcontext
         if !unsafe { &mut (*frame).context }.restore_sigcontext(trap_frame) {
             error!("unable to restore sigcontext");
@@ -533,6 +583,8 @@ impl SignalArch for X86_64SignalArch {
 /// @param regs 之前的系统调用将要返回的时候,要弹出的栈帧的拷贝
 ///
 /// @return Result<0,SystemError> 若Error, 则返回错误码,否则返回Ok(0)
+///
+/// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/arch/x86/kernel/signal.c#787
 fn handle_signal(
     sig: Signal,
     sigaction: &mut Sigaction,
@@ -540,8 +592,28 @@ fn handle_signal(
     oldset: &SigSet,
     frame: &mut TrapFrame,
 ) -> Result<i32, SystemError> {
-    // TODO 这里要补充一段逻辑,好像是为了保证引入线程之后的地址空间不会出问题。详见https://code.dragonos.org.cn/xref/linux-6.1.9/arch/mips/kernel/signal.c#830
-
+    if unsafe { frame.syscall_nr() }.is_some() {
+        if let Some(syscall_err) = unsafe { frame.syscall_error() } {
+            match syscall_err {
+                SystemError::ERESTARTNOHAND | SystemError::ERESTART_RESTARTBLOCK => {
+                    frame.rax = SystemError::EINTR.to_posix_errno() as i64 as u64;
+                }
+                SystemError::ERESTARTSYS => {
+                    if !sigaction.flags().contains(SigFlags::SA_RESTART) {
+                        frame.rax = SystemError::EINTR.to_posix_errno() as i64 as u64;
+                    } else {
+                        frame.rax = frame.errcode;
+                        frame.rip -= 2;
+                    }
+                }
+                SystemError::ERESTARTNOINTR => {
+                    frame.rax = frame.errcode;
+                    frame.rip -= 2;
+                }
+                _ => {}
+            }
+        }
+    }
     // 设置栈帧
     return setup_frame(sig, sigaction, info, oldset, frame);
 }

+ 2 - 1
kernel/src/arch/x86_64/mm/fault.rs

@@ -267,9 +267,10 @@ impl X86_64MMArch {
                         });
                 } else {
                     log::error!(
-                        "No mapped vma, error_code: {:#b}, address: {:#x}",
+                        "No mapped vma, error_code: {:#b}, address: {:#x}, flags: {:?}",
                         error_code,
                         address.data(),
+                        flags
                     );
                     let pid = ProcessManager::current_pid();
                     let mut info =

+ 2 - 0
kernel/src/arch/x86_64/syscall/mod.rs

@@ -82,6 +82,8 @@ macro_rules! normal_syscall_return {
 
 #[no_mangle]
 pub extern "sysv64" fn syscall_handler(frame: &mut TrapFrame) {
+    // 系统调用进入时,把系统调用号存入errcode字段,以便在syscall_handler退出后,仍能获取到系统调用号
+    frame.errcode = frame.rax;
     let syscall_num = frame.rax as usize;
     // 防止sys_sched由于超时无法退出导致的死锁
     if syscall_num == SYS_SCHED {

+ 1 - 0
kernel/src/bpf/helper/consts.rs

@@ -1,6 +1,7 @@
 pub const HELPER_MAP_LOOKUP_ELEM: u32 = 1;
 pub const HELPER_MAP_UPDATE_ELEM: u32 = 2;
 pub const HELPER_MAP_DELETE_ELEM: u32 = 3;
+pub const HELPER_KTIME_GET_NS: u32 = 5;
 pub const HELPER_MAP_FOR_EACH_ELEM: u32 = 164;
 pub const HELPER_MAP_LOOKUP_PERCPU_ELEM: u32 = 195;
 pub const HELPER_PERF_EVENT_OUTPUT: u32 = 25;

+ 6 - 0
kernel/src/bpf/helper/mod.rs

@@ -6,6 +6,7 @@ use crate::bpf::map::{BpfCallBackFn, BpfMap};
 use crate::include::bindings::linux_bpf::BPF_F_CURRENT_CPU;
 use crate::libs::lazy_init::Lazy;
 use crate::smp::core::smp_get_processor_id;
+use crate::time::Instant;
 use alloc::{collections::BTreeMap, sync::Arc};
 use core::ffi::c_void;
 use system_error::SystemError;
@@ -300,6 +301,10 @@ pub fn map_peek_elem(map: &Arc<BpfMap>, value: &mut [u8]) -> Result<()> {
     value
 }
 
+pub fn bpf_ktime_get_ns() -> u64 {
+    (Instant::now().total_micros() * 1000) as u64
+}
+
 pub static BPF_HELPER_FUN_SET: Lazy<BTreeMap<u32, RawBPFHelperFn>> = Lazy::new();
 
 /// Initialize the helper functions.
@@ -311,6 +316,7 @@ pub fn init_helper_functions() {
         map.insert(HELPER_MAP_LOOKUP_ELEM, define_func!(raw_map_lookup_elem));
         map.insert(HELPER_MAP_UPDATE_ELEM, define_func!(raw_map_update_elem));
         map.insert(HELPER_MAP_DELETE_ELEM, define_func!(raw_map_delete_elem));
+        map.insert(HELPER_KTIME_GET_NS, define_func!(bpf_ktime_get_ns));
         map.insert(
             HELPER_MAP_FOR_EACH_ELEM,
             define_func!(raw_map_for_each_elem),

+ 1 - 1
kernel/src/bpf/mod.rs

@@ -33,7 +33,7 @@ pub fn bpf(cmd: bpf_cmd, attr: &bpf_attr) -> Result<usize> {
         // Program related commands
         bpf_cmd::BPF_PROG_LOAD => prog::bpf_prog_load(attr),
         // Object creation commands
-        bpf_cmd::BPF_BTF_LOAD => {
+        bpf_cmd::BPF_BTF_LOAD | bpf_cmd::BPF_LINK_CREATE | bpf_cmd::BPF_OBJ_GET_INFO_BY_FD => {
             error!("bpf cmd {:?} not implemented", cmd);
             return Err(SystemError::ENOSYS);
         }

+ 1 - 0
kernel/src/driver/base/device/mod.rs

@@ -310,6 +310,7 @@ pub enum DeviceType {
     PlatformDev,
     Char,
     Pci,
+    Other,
 }
 
 /// @brief: 设备标识符类型

+ 0 - 1
kernel/src/driver/base/init.rs

@@ -21,7 +21,6 @@ pub fn driver_init() -> Result<(), SystemError> {
     platform_bus_init()?;
     serio_bus_init()?;
     CpuDeviceManager::init()?;
-
     // 至此,已完成设备驱动模型的初始化
     return Ok(());
 }

+ 2 - 6
kernel/src/driver/net/dma.rs

@@ -3,7 +3,7 @@ use crate::arch::mm::kernel_page_flags;
 use crate::arch::MMArch;
 
 use crate::mm::kernel_mapper::KernelMapper;
-use crate::mm::page::{page_manager_lock_irqsave, EntryFlags};
+use crate::mm::page::EntryFlags;
 use crate::mm::{
     allocator::page_frame::{
         allocate_page_frames, deallocate_page_frames, PageFrameCount, PhysPageFrame,
@@ -61,11 +61,7 @@ pub unsafe fn dma_dealloc(paddr: usize, vaddr: NonNull<u8>, pages: usize) -> i32
     flusher.flush();
 
     unsafe {
-        deallocate_page_frames(
-            PhysPageFrame::new(PhysAddr::new(paddr)),
-            page_count,
-            &mut page_manager_lock_irqsave(),
-        );
+        deallocate_page_frames(PhysPageFrame::new(PhysAddr::new(paddr)), page_count);
     }
     return 0;
 }

+ 1 - 1
kernel/src/driver/tty/tty_device.rs

@@ -263,7 +263,7 @@ impl IndexNode for TtyDevice {
                 break;
             }
 
-            if pcb.sig_info_irqsave().sig_pending().has_pending() {
+            if pcb.has_pending_signal_fast() {
                 return Err(SystemError::ERESTARTSYS);
             }
         }

+ 8 - 4
kernel/src/driver/tty/tty_job_control.rs

@@ -4,7 +4,7 @@ use system_error::SystemError;
 use crate::{
     arch::ipc::signal::{SigSet, Signal},
     mm::VirtAddr,
-    process::{Pid, ProcessManager},
+    process::{Pid, ProcessFlags, ProcessManager},
     syscall::{
         user_access::{UserBufferReader, UserBufferWriter},
         Syscall,
@@ -51,9 +51,9 @@ impl TtyJobCtrlManager {
         if tty_pgid.is_some() && tty_pgid.unwrap() != pgid {
             if pcb
                 .sig_info_irqsave()
-                .sig_block()
+                .sig_blocked()
                 .contains(SigSet::from_bits_truncate(1 << sig as u64))
-                || pcb.sig_struct_irqsave().handlers[sig as usize].is_ignore()
+                || pcb.sig_struct_irqsave().handlers[sig as usize - 1].is_ignore()
             {
                 // 忽略该信号
                 if sig == Signal::SIGTTIN {
@@ -62,7 +62,11 @@ impl TtyJobCtrlManager {
             } else {
                 // 暂时使用kill而不是killpg
                 Syscall::kill(pgid, sig as i32)?;
-                return Err(SystemError::ERESTART);
+                ProcessManager::current_pcb()
+                    .flags()
+                    .insert(ProcessFlags::HAS_PENDING_SIGNAL);
+                log::debug!("job_ctrl_ioctl: kill. pgid: {pgid}, tty_pgid: {tty_pgid:?}");
+                return Err(SystemError::ERESTARTSYS);
             }
         }
 

+ 11 - 7
kernel/src/driver/tty/tty_ldisc/ntty.rs

@@ -21,7 +21,7 @@ use crate::{
     },
     mm::VirtAddr,
     net::event_poll::EPollEventType,
-    process::ProcessManager,
+    process::{ProcessFlags, ProcessManager},
     syscall::{user_access::UserBufferWriter, Syscall},
 };
 
@@ -1680,11 +1680,11 @@ impl TtyLineDiscipline for NTtyLinediscipline {
                     break;
                 }
 
-                if ProcessManager::current_pcb()
-                    .sig_info_irqsave()
-                    .sig_pending()
-                    .has_pending()
-                {
+                if ProcessManager::current_pcb().has_pending_signal_fast() {
+                    ProcessManager::current_pcb()
+                        .flags()
+                        .insert(ProcessFlags::HAS_PENDING_SIGNAL);
+
                     ret = Err(SystemError::ERESTARTSYS);
                     break;
                 }
@@ -1763,7 +1763,11 @@ impl TtyLineDiscipline for NTtyLinediscipline {
         // drop(ldata);
         let mut offset = 0;
         loop {
-            if pcb.sig_info_irqsave().sig_pending().has_pending() {
+            if pcb.has_pending_signal_fast() {
+                ProcessManager::current_pcb()
+                    .flags()
+                    .insert(ProcessFlags::HAS_PENDING_SIGNAL);
+
                 return Err(SystemError::ERESTARTSYS);
             }
             if core.flags().contains(TtyFlag::HUPPED) {

+ 2 - 6
kernel/src/driver/virtio/virtio_impl.rs

@@ -3,7 +3,7 @@ use crate::arch::mm::kernel_page_flags;
 use crate::arch::MMArch;
 
 use crate::mm::kernel_mapper::KernelMapper;
-use crate::mm::page::{page_manager_lock_irqsave, EntryFlags};
+use crate::mm::page::EntryFlags;
 use crate::mm::{
     allocator::page_frame::{
         allocate_page_frames, deallocate_page_frames, PageFrameCount, PhysPageFrame,
@@ -72,11 +72,7 @@ unsafe impl Hal for HalImpl {
         flusher.flush();
 
         unsafe {
-            deallocate_page_frames(
-                PhysPageFrame::new(PhysAddr::new(paddr)),
-                page_count,
-                &mut page_manager_lock_irqsave(),
-            );
+            deallocate_page_frames(PhysPageFrame::new(PhysAddr::new(paddr)), page_count);
         }
         return 0;
     }

+ 46 - 0
kernel/src/exception/entry.rs

@@ -0,0 +1,46 @@
+use crate::{
+    arch::{interrupt::TrapFrame, CurrentSignalArch},
+    ipc::signal_types::SignalArch,
+    process::{ProcessFlags, ProcessManager},
+};
+
+#[no_mangle]
+unsafe extern "C" fn irqentry_exit(frame: &mut TrapFrame) {
+    if frame.is_from_user() {
+        irqentry_exit_to_user_mode(frame);
+    }
+}
+
+/// 退出到用户态之前,在这个函数内做最后的处理
+///
+/// # Safety
+///
+/// 由于这个函数内可能会直接退出进程,因此,在进入函数之前,
+/// 必须保证所有的栈上的Arc/Box指针等,都已经被释放。否则,可能会导致内存泄漏。
+unsafe fn irqentry_exit_to_user_mode(frame: &mut TrapFrame) {
+    exit_to_user_mode_prepare(frame);
+}
+
+/// # Safety
+///
+/// 由于这个函数内可能会直接退出进程,因此,在进入函数之前,
+/// 必须保证所有的栈上的Arc/Box指针等,都已经被释放。否则,可能会导致内存泄漏。
+unsafe fn exit_to_user_mode_prepare(frame: &mut TrapFrame) {
+    let process_flags_work = *ProcessManager::current_pcb().flags();
+    if !process_flags_work.exit_to_user_mode_work().is_empty() {
+        exit_to_user_mode_loop(frame, process_flags_work);
+    }
+}
+
+/// # Safety
+///
+/// 由于这个函数内可能会直接退出进程,因此,在进入函数之前,
+/// 必须保证所有的栈上的Arc/Box指针等,都已经被释放。否则,可能会导致内存泄漏。
+unsafe fn exit_to_user_mode_loop(frame: &mut TrapFrame, mut process_flags_work: ProcessFlags) {
+    while !process_flags_work.exit_to_user_mode_work().is_empty() {
+        if process_flags_work.contains(ProcessFlags::HAS_PENDING_SIGNAL) {
+            unsafe { CurrentSignalArch::do_signal_or_restart(frame) };
+        }
+        process_flags_work = *ProcessManager::current_pcb().flags();
+    }
+}

+ 1 - 0
kernel/src/exception/mod.rs

@@ -7,6 +7,7 @@ use crate::arch::CurrentIrqArch;
 pub mod debug;
 pub mod dummychip;
 pub mod ebreak;
+pub mod entry;
 pub mod handle;
 pub mod init;
 pub mod ipi;

+ 39 - 13
kernel/src/filesystem/eventfd.rs

@@ -4,7 +4,7 @@ use crate::filesystem::vfs::{FilePrivateData, FileSystem, FileType, IndexNode, M
 use crate::libs::spinlock::{SpinLock, SpinLockGuard};
 use crate::libs::wait_queue::WaitQueue;
 use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll, KernelIoctlData};
-use crate::process::ProcessManager;
+use crate::process::{ProcessFlags, ProcessManager};
 use crate::sched::SchedMode;
 use crate::syscall::Syscall;
 use alloc::collections::LinkedList;
@@ -82,6 +82,21 @@ impl EventFdInode {
         let count = self.eventfd.lock().count;
         return count > 0;
     }
+
+    fn do_poll(
+        &self,
+        _private_data: &FilePrivateData,
+        self_guard: &SpinLockGuard<'_, EventFd>,
+    ) -> Result<usize, SystemError> {
+        let mut events = EPollEventType::empty();
+        if self_guard.count != 0 {
+            events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM;
+        }
+        if self_guard.count != u64::MAX {
+            events |= EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM;
+        }
+        return Ok(events.bits() as usize);
+    }
 }
 
 impl IndexNode for EventFdInode {
@@ -125,8 +140,17 @@ impl IndexNode for EventFdInode {
             }
 
             drop(lock_efd);
+
+            if ProcessManager::current_pcb().has_pending_signal_fast() {
+                return Err(SystemError::ERESTARTSYS);
+            }
+
             let r = wq_wait_event_interruptible!(self.wait_queue, self.readable(), {});
             if r.is_err() {
+                ProcessManager::current_pcb()
+                    .flags()
+                    .insert(ProcessFlags::HAS_PENDING_SIGNAL);
+
                 return Err(SystemError::ERESTARTSYS);
             }
 
@@ -134,7 +158,7 @@ impl IndexNode for EventFdInode {
         }
         let mut val = lock_efd.count;
 
-        let mut eventfd = self.eventfd.lock();
+        let mut eventfd = lock_efd;
         if eventfd.flags.contains(EventFdFlags::EFD_SEMAPHORE) {
             eventfd.count -= 1;
             val = 1;
@@ -143,8 +167,9 @@ impl IndexNode for EventFdInode {
         }
         let val_bytes = val.to_ne_bytes();
         buf[..8].copy_from_slice(&val_bytes);
+        let pollflag = EPollEventType::from_bits_truncate(self.do_poll(&data, &eventfd)? as u32);
+        drop(eventfd);
 
-        let pollflag = EPollEventType::from_bits_truncate(self.poll(&data)? as u32);
         // 唤醒epoll中等待的进程
         EventPoll::wakeup_epoll(&self.epitems, Some(pollflag))?;
 
@@ -174,6 +199,9 @@ impl IndexNode for EventFdInode {
             return Err(SystemError::EINVAL);
         }
         loop {
+            if ProcessManager::current_pcb().has_pending_signal() {
+                return Err(SystemError::ERESTARTSYS);
+            }
             let eventfd = self.eventfd.lock();
             if u64::MAX - eventfd.count > val {
                 break;
@@ -185,13 +213,17 @@ impl IndexNode for EventFdInode {
                 return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
             }
             drop(eventfd);
-            self.wait_queue.sleep();
+            self.wait_queue.sleep().ok();
         }
         let mut eventfd = self.eventfd.lock();
         eventfd.count += val;
+        drop(eventfd);
         self.wait_queue.wakeup_all(None);
 
-        let pollflag = EPollEventType::from_bits_truncate(self.poll(&data)? as u32);
+        let eventfd = self.eventfd.lock();
+        let pollflag = EPollEventType::from_bits_truncate(self.do_poll(&data, &eventfd)? as u32);
+        drop(eventfd);
+
         // 唤醒epoll中等待的进程
         EventPoll::wakeup_epoll(&self.epitems, Some(pollflag))?;
         return Ok(8);
@@ -202,14 +234,8 @@ impl IndexNode for EventFdInode {
     /// - 如果 counter 的值大于 0 ,那么 fd 的状态就是可读的
     /// - 如果能无阻塞地写入一个至少为 1 的值,那么 fd 的状态就是可写的
     fn poll(&self, _private_data: &FilePrivateData) -> Result<usize, SystemError> {
-        let mut events = EPollEventType::empty();
-        if self.eventfd.lock().count != 0 {
-            events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM;
-        }
-        if self.eventfd.lock().count != u64::MAX {
-            events |= EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM;
-        }
-        return Ok(events.bits() as usize);
+        let self_guard = self.eventfd.lock();
+        self.do_poll(_private_data, &self_guard)
     }
 
     fn metadata(&self) -> Result<Metadata, SystemError> {

+ 98 - 29
kernel/src/filesystem/fat/fs.rs

@@ -14,7 +14,7 @@ use alloc::{
 
 use crate::driver::base::block::gendisk::GenDisk;
 use crate::driver::base::device::device_number::DeviceNumber;
-use crate::filesystem::vfs::file::PageCache;
+use crate::filesystem::page_cache::PageCache;
 use crate::filesystem::vfs::utils::DName;
 use crate::filesystem::vfs::{Magic, SpecialNodeData, SuperBlock};
 use crate::ipc::pipe::LockedPipeInode;
@@ -129,9 +129,8 @@ pub struct FATInode {
 }
 
 impl FATInode {
-    /// @brief 更新当前inode的元数据
-    pub fn update_metadata(&mut self) {
-        // todo: 更新文件的访问时间等信息
+    /// 将inode的元数据与磁盘同步
+    pub fn synchronize_metadata(&mut self) {
         match &self.inode_type {
             FATDirEntry::File(f) | FATDirEntry::VolId(f) => {
                 self.metadata.size = f.size() as i64;
@@ -146,6 +145,19 @@ impl FATInode {
         };
     }
 
+    /// 更新inode的元数据
+    pub fn update_metadata(&mut self, size: Option<i64>) {
+        if let Some(new_size) = size {
+            self.metadata.size = new_size;
+        }
+        self.update_time();
+    }
+
+    /// 更新访问时间
+    pub fn update_time(&mut self) {
+        // log::warn!("update_time has not yet been implemented");
+    }
+
     fn find(&mut self, name: &str) -> Result<Arc<LockedFATInode>, SystemError> {
         match &self.inode_type {
             FATDirEntry::Dir(d) => {
@@ -234,7 +246,7 @@ impl LockedFATInode {
 
         inode.0.lock().self_ref = Arc::downgrade(&inode);
 
-        inode.0.lock().update_metadata();
+        inode.0.lock().synchronize_metadata();
 
         return inode;
     }
@@ -1386,24 +1398,14 @@ impl FATFsInfo {
 }
 
 impl IndexNode for LockedFATInode {
-    fn read_at(
-        &self,
-        offset: usize,
-        len: usize,
-        buf: &mut [u8],
-        _data: SpinLockGuard<FilePrivateData>,
-    ) -> Result<usize, SystemError> {
-        let mut guard: SpinLockGuard<FATInode> = self.0.lock();
+    fn read_sync(&self, offset: usize, buf: &mut [u8]) -> Result<usize, SystemError> {
+        let guard: SpinLockGuard<FATInode> = self.0.lock();
         match &guard.inode_type {
             FATDirEntry::File(f) | FATDirEntry::VolId(f) => {
-                let r = f.read(
-                    &guard.fs.upgrade().unwrap(),
-                    &mut buf[0..len],
-                    offset as u64,
-                );
-                guard.update_metadata();
+                let r = f.read(&guard.fs.upgrade().unwrap(), buf, offset as u64);
                 return r;
             }
+
             FATDirEntry::Dir(_) => {
                 return Err(SystemError::EISDIR);
             }
@@ -1414,25 +1416,20 @@ impl IndexNode for LockedFATInode {
         }
     }
 
-    fn write_at(
-        &self,
-        offset: usize,
-        len: usize,
-        buf: &[u8],
-        _data: SpinLockGuard<FilePrivateData>,
-    ) -> Result<usize, SystemError> {
+    fn write_sync(&self, offset: usize, buf: &[u8]) -> Result<usize, SystemError> {
         let mut guard: SpinLockGuard<FATInode> = self.0.lock();
         let fs: &Arc<FATFileSystem> = &guard.fs.upgrade().unwrap();
 
         match &mut guard.inode_type {
             FATDirEntry::File(f) | FATDirEntry::VolId(f) => {
-                let r = f.write(fs, &buf[0..len], offset as u64);
-                guard.update_metadata();
+                let r = f.write(fs, buf, offset as u64);
                 return r;
             }
+
             FATDirEntry::Dir(_) => {
                 return Err(SystemError::EISDIR);
             }
+
             FATDirEntry::UnInit => {
                 error!("FATFS: param: Inode_type uninitialized.");
                 return Err(SystemError::EROFS);
@@ -1440,6 +1437,74 @@ impl IndexNode for LockedFATInode {
         }
     }
 
+    fn read_at(
+        &self,
+        offset: usize,
+        len: usize,
+        buf: &mut [u8],
+        data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        let len = core::cmp::min(len, buf.len());
+        let buf = &mut buf[0..len];
+
+        let page_cache = self.0.lock().page_cache.clone();
+        if let Some(page_cache) = page_cache {
+            let r = page_cache.lock_irqsave().read(offset, &mut buf[0..len]);
+            // self.0.lock_irqsave().update_metadata();
+            return r;
+        } else {
+            return self.read_direct(offset, len, buf, data);
+        }
+    }
+
+    fn write_at(
+        &self,
+        offset: usize,
+        len: usize,
+        buf: &[u8],
+        data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        let len = core::cmp::min(len, buf.len());
+        let buf = &buf[0..len];
+
+        let page_cache = self.0.lock().page_cache.clone();
+        if let Some(page_cache) = page_cache {
+            let write_len = page_cache.lock_irqsave().write(offset, buf)?;
+            let mut guard = self.0.lock();
+            let old_size = guard.metadata.size;
+            guard.update_metadata(Some(core::cmp::max(old_size, (offset + write_len) as i64)));
+            return Ok(write_len);
+        } else {
+            return self.write_direct(offset, len, buf, data);
+        }
+    }
+
+    fn read_direct(
+        &self,
+        offset: usize,
+        len: usize,
+        buf: &mut [u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        let len = core::cmp::min(len, buf.len());
+        let r = self.read_sync(offset, &mut buf[0..len]);
+        // self.0.lock_irqsave().update_metadata();
+        return r;
+    }
+
+    fn write_direct(
+        &self,
+        offset: usize,
+        len: usize,
+        buf: &[u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        let len = core::cmp::min(len, buf.len());
+        let r = self.write_sync(offset, &buf[0..len]);
+        // self.0.lock_irqsave().update_metadata();
+        return r;
+    }
+
     fn create(
         &self,
         name: &str,
@@ -1496,6 +1561,10 @@ impl IndexNode for LockedFATInode {
         Ok(())
     }
     fn resize(&self, len: usize) -> Result<(), SystemError> {
+        if let Some(page_cache) = self.page_cache() {
+            return page_cache.lock_irqsave().resize(len);
+        }
+
         let mut guard: SpinLockGuard<FATInode> = self.0.lock();
         let fs: &Arc<FATFileSystem> = &guard.fs.upgrade().unwrap();
         let old_size = guard.metadata.size as usize;
@@ -1527,7 +1596,7 @@ impl IndexNode for LockedFATInode {
                         file.truncate(fs, len as u64)?;
                     }
                 }
-                guard.update_metadata();
+                guard.synchronize_metadata();
                 return Ok(());
             }
             FATDirEntry::Dir(_) => return Err(SystemError::ENOSYS),

+ 1 - 0
kernel/src/filesystem/mod.rs

@@ -5,6 +5,7 @@ pub mod fat;
 pub mod kernfs;
 pub mod mbr;
 pub mod overlayfs;
+pub mod page_cache;
 pub mod procfs;
 pub mod ramfs;
 pub mod sysfs;

+ 346 - 0
kernel/src/filesystem/page_cache.rs

@@ -0,0 +1,346 @@
+use core::cmp::min;
+
+use alloc::{
+    sync::{Arc, Weak},
+    vec::Vec,
+};
+use hashbrown::HashMap;
+use system_error::SystemError;
+
+use super::vfs::IndexNode;
+use crate::libs::spinlock::SpinLockGuard;
+use crate::mm::page::FileMapInfo;
+use crate::{arch::mm::LockedFrameAllocator, libs::lazy_init::Lazy};
+use crate::{
+    arch::MMArch,
+    libs::spinlock::SpinLock,
+    mm::{
+        page::{page_manager_lock_irqsave, page_reclaimer_lock_irqsave, Page, PageFlags},
+        MemoryManagementArch,
+    },
+};
+use crate::{libs::align::page_align_up, mm::page::PageType};
+
+/// 页面缓存
+#[derive(Debug)]
+pub struct PageCache {
+    inner: SpinLock<InnerPageCache>,
+    inode: Lazy<Weak<dyn IndexNode>>,
+}
+
+#[derive(Debug)]
+pub struct InnerPageCache {
+    pages: HashMap<usize, Arc<Page>>,
+    page_cache_ref: Weak<PageCache>,
+}
+
+impl InnerPageCache {
+    pub fn new(page_cache_ref: Weak<PageCache>) -> InnerPageCache {
+        Self {
+            pages: HashMap::new(),
+            page_cache_ref,
+        }
+    }
+
+    pub fn add_page(&mut self, offset: usize, page: &Arc<Page>) {
+        self.pages.insert(offset, page.clone());
+    }
+
+    pub fn get_page(&self, offset: usize) -> Option<Arc<Page>> {
+        self.pages.get(&offset).cloned()
+    }
+
+    pub fn remove_page(&mut self, offset: usize) -> Option<Arc<Page>> {
+        self.pages.remove(&offset)
+    }
+
+    fn create_pages(&mut self, start_page_index: usize, buf: &[u8]) -> Result<(), SystemError> {
+        assert!(buf.len() % MMArch::PAGE_SIZE == 0);
+
+        let page_num = buf.len() / MMArch::PAGE_SIZE;
+
+        let len = buf.len();
+        if len == 0 {
+            return Ok(());
+        }
+
+        let mut page_manager_guard = page_manager_lock_irqsave();
+
+        for i in 0..page_num {
+            let buf_offset = i * MMArch::PAGE_SIZE;
+            let page_index = start_page_index + i;
+
+            let page = page_manager_guard.create_one_page(
+                PageType::File(FileMapInfo {
+                    page_cache: self
+                        .page_cache_ref
+                        .upgrade()
+                        .expect("failed to get self_arc of pagecache"),
+                    index: page_index,
+                }),
+                PageFlags::PG_LRU,
+                &mut LockedFrameAllocator,
+            )?;
+
+            let mut page_guard = page.write_irqsave();
+            unsafe {
+                page_guard.copy_from_slice(&buf[buf_offset..buf_offset + MMArch::PAGE_SIZE]);
+            }
+
+            self.add_page(page_index, &page);
+        }
+
+        Ok(())
+    }
+
+    /// 从PageCache中读取数据。
+    ///
+    /// ## 参数
+    ///
+    /// - `offset` 偏移量
+    /// - `buf` 缓冲区
+    ///
+    /// ## 返回值
+    ///
+    /// - `Ok(usize)` 成功读取的长度
+    /// - `Err(SystemError)` 失败返回错误码
+    pub fn read(&mut self, offset: usize, buf: &mut [u8]) -> Result<usize, SystemError> {
+        let inode = self
+            .page_cache_ref
+            .upgrade()
+            .unwrap()
+            .inode
+            .upgrade()
+            .unwrap();
+        let file_size = inode.metadata().unwrap().size;
+
+        let len = if offset < file_size as usize {
+            core::cmp::min(file_size as usize, offset + buf.len()) - offset
+        } else {
+            0
+        };
+
+        if len == 0 {
+            return Ok(0);
+        }
+
+        let mut not_exist = Vec::new();
+
+        let start_page_index = offset >> MMArch::PAGE_SHIFT;
+        let page_num = (page_align_up(offset + len) >> MMArch::PAGE_SHIFT) - start_page_index;
+
+        let mut buf_offset = 0;
+        let mut ret = 0;
+        for i in 0..page_num {
+            let page_index = start_page_index + i;
+
+            // 第一个页可能需要计算页内偏移
+            let page_offset = if i == 0 {
+                offset % MMArch::PAGE_SIZE
+            } else {
+                0
+            };
+
+            // 第一个页和最后一个页可能不满
+            let sub_len = if i == 0 {
+                min(len, MMArch::PAGE_SIZE - page_offset)
+            } else if i == page_num - 1 {
+                (offset + len - 1) % MMArch::PAGE_SIZE + 1
+            } else {
+                MMArch::PAGE_SIZE
+            };
+
+            if let Some(page) = self.get_page(page_index) {
+                let sub_buf = &mut buf[buf_offset..(buf_offset + sub_len)];
+                unsafe {
+                    sub_buf.copy_from_slice(
+                        &page.read_irqsave().as_slice()[page_offset..page_offset + sub_len],
+                    );
+                }
+                ret += sub_len;
+            } else if let Some((index, count)) = not_exist.last_mut() {
+                if *index + *count == page_index {
+                    *count += 1;
+                } else {
+                    not_exist.push((page_index, 1));
+                }
+            } else {
+                not_exist.push((page_index, 1));
+            }
+
+            buf_offset += sub_len;
+        }
+
+        for (page_index, count) in not_exist {
+            // TODO 这里使用buffer避免多次读取磁盘,将来引入异步IO直接写入页面,减少内存开销和拷贝
+            let mut page_buf = vec![0u8; MMArch::PAGE_SIZE * count];
+            inode.read_sync(page_index * MMArch::PAGE_SIZE, page_buf.as_mut())?;
+
+            self.create_pages(page_index, page_buf.as_mut())?;
+
+            // 实际要拷贝的内容在文件中的偏移量
+            let copy_offset = core::cmp::max(page_index * MMArch::PAGE_SIZE, offset);
+            // 实际要拷贝的内容的长度
+            let copy_len = core::cmp::min((page_index + count) * MMArch::PAGE_SIZE, offset + len)
+                - copy_offset;
+
+            let page_buf_offset = if page_index * MMArch::PAGE_SIZE < copy_offset {
+                copy_offset - page_index * MMArch::PAGE_SIZE
+            } else {
+                0
+            };
+
+            let buf_offset = copy_offset.saturating_sub(offset);
+
+            buf[buf_offset..buf_offset + copy_len]
+                .copy_from_slice(&page_buf[page_buf_offset..page_buf_offset + copy_len]);
+
+            ret += copy_len;
+
+            // log::debug!("page_offset:{page_offset}, count:{count}");
+            // log::debug!("copy_offset:{copy_offset}, copy_len:{copy_len}");
+            // log::debug!("buf_offset:{buf_offset}, page_buf_offset:{page_buf_offset}");
+        }
+
+        Ok(ret)
+    }
+
+    /// 向PageCache中写入数据。
+    ///
+    /// ## 参数
+    ///
+    /// - `offset` 偏移量
+    /// - `buf` 缓冲区
+    ///
+    /// ## 返回值
+    ///
+    /// - `Ok(usize)` 成功读取的长度
+    /// - `Err(SystemError)` 失败返回错误码
+    pub fn write(&mut self, offset: usize, buf: &[u8]) -> Result<usize, SystemError> {
+        let len = buf.len();
+        if len == 0 {
+            return Ok(0);
+        }
+
+        // log::debug!("offset:{offset}, len:{len}");
+
+        let start_page_index = offset >> MMArch::PAGE_SHIFT;
+        let page_num = (page_align_up(offset + len) >> MMArch::PAGE_SHIFT) - start_page_index;
+
+        let mut buf_offset = 0;
+        let mut ret = 0;
+
+        for i in 0..page_num {
+            let page_index = start_page_index + i;
+
+            // 第一个页可能需要计算页内偏移
+            let page_offset = if i == 0 {
+                offset % MMArch::PAGE_SIZE
+            } else {
+                0
+            };
+
+            // 第一个页和最后一个页可能不满
+            let sub_len = if i == 0 {
+                min(len, MMArch::PAGE_SIZE - page_offset)
+            } else if i == page_num - 1 {
+                (offset + len - 1) % MMArch::PAGE_SIZE + 1
+            } else {
+                MMArch::PAGE_SIZE
+            };
+
+            let mut page = self.get_page(page_index);
+
+            if page.is_none() {
+                let page_buf = vec![0u8; MMArch::PAGE_SIZE];
+                self.create_pages(page_index, &page_buf)?;
+                page = self.get_page(page_index);
+            }
+
+            if let Some(page) = page {
+                let sub_buf = &buf[buf_offset..(buf_offset + sub_len)];
+                let mut page_guard = page.write_irqsave();
+                unsafe {
+                    page_guard.as_slice_mut()[page_offset..page_offset + sub_len]
+                        .copy_from_slice(sub_buf);
+                }
+                page_guard.add_flags(PageFlags::PG_DIRTY);
+
+                ret += sub_len;
+
+                // log::debug!(
+                //     "page_offset:{page_offset}, buf_offset:{buf_offset}, sub_len:{sub_len}"
+                // );
+            } else {
+                return Err(SystemError::EIO);
+            };
+
+            buf_offset += sub_len;
+        }
+        Ok(ret)
+    }
+
+    pub fn resize(&mut self, len: usize) -> Result<(), SystemError> {
+        let page_num = page_align_up(len) / MMArch::PAGE_SIZE;
+
+        let mut reclaimer = page_reclaimer_lock_irqsave();
+        for (_i, page) in self.pages.drain_filter(|index, _page| *index >= page_num) {
+            let _ = reclaimer.remove_page(&page.phys_address());
+        }
+
+        if page_num > 0 {
+            let last_page_index = page_num - 1;
+            let last_len = len - last_page_index * MMArch::PAGE_SIZE;
+            if let Some(page) = self.get_page(last_page_index) {
+                unsafe {
+                    page.write_irqsave().truncate(last_len);
+                };
+            } else {
+                return Err(SystemError::EIO);
+            }
+        }
+
+        Ok(())
+    }
+}
+
+impl Drop for InnerPageCache {
+    fn drop(&mut self) {
+        log::debug!("page cache drop");
+        let mut page_manager = page_manager_lock_irqsave();
+        for page in self.pages.values() {
+            page_manager.remove_page(&page.phys_address());
+        }
+    }
+}
+
+impl PageCache {
+    pub fn new(inode: Option<Weak<dyn IndexNode>>) -> Arc<PageCache> {
+        Arc::new_cyclic(|weak| Self {
+            inner: SpinLock::new(InnerPageCache::new(weak.clone())),
+            inode: {
+                let v: Lazy<Weak<dyn IndexNode>> = Lazy::new();
+                if let Some(inode) = inode {
+                    v.init(inode);
+                }
+                v
+            },
+        })
+    }
+
+    pub fn inode(&self) -> Option<Weak<dyn IndexNode>> {
+        self.inode.try_get().cloned()
+    }
+
+    pub fn set_inode(&self, inode: Weak<dyn IndexNode>) -> Result<(), SystemError> {
+        if self.inode.initialized() {
+            return Err(SystemError::EINVAL);
+        }
+        self.inode.init(inode);
+        Ok(())
+    }
+
+    pub fn lock_irqsave(&self) -> SpinLockGuard<InnerPageCache> {
+        self.inner.lock_irqsave()
+    }
+}

+ 26 - 1
kernel/src/filesystem/procfs/mod.rs

@@ -394,7 +394,31 @@ impl ProcFS {
         } else {
             panic!("create ksmg error");
         }
-
+        // 这个文件是用来欺骗Aya框架识别内核版本
+        /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release,
+         * but Ubuntu provides /proc/version_signature file, as described at
+         * https://ubuntu.com/kernel, with an example contents below, which we
+         * can use to get a proper LINUX_VERSION_CODE.
+         *
+         *   Ubuntu 5.4.0-12.15-generic 5.4.8
+         *
+         * In the above, 5.4.8 is what kernel is actually expecting, while
+         * uname() call will return 5.4.0 in info.release.
+         */
+        let binding = inode.create("version_signature", FileType::File, ModeType::S_IRUGO);
+        if let Ok(version_signature) = binding {
+            let version_signature = version_signature
+                .as_any_ref()
+                .downcast_ref::<LockedProcFSInode>()
+                .unwrap();
+            version_signature.0.lock().fdata.ftype = ProcFileType::Default;
+            version_signature.0.lock().data = "DragonOS 6.0.0-generic 6.0.0\n"
+                .to_string()
+                .as_bytes()
+                .to_vec();
+        } else {
+            panic!("create version_signature error");
+        }
         return result;
     }
 
@@ -466,6 +490,7 @@ impl IndexNode for LockedProcFSInode {
         let file_size = match inode.fdata.ftype {
             ProcFileType::ProcStatus => inode.open_status(&mut private_data)?,
             ProcFileType::ProcMeminfo => inode.open_meminfo(&mut private_data)?,
+            ProcFileType::Default => inode.data.len() as i64,
             _ => {
                 todo!()
             }

+ 25 - 108
kernel/src/filesystem/vfs/file.rs

@@ -5,16 +5,13 @@ use alloc::{
     sync::{Arc, Weak},
     vec::Vec,
 };
-use kdepends::xarray::XArray;
 use log::error;
 use system_error::SystemError;
 
 use super::{Dirent, FileType, IndexNode, InodeId, Metadata, SpecialNodeData};
 use crate::filesystem::eventfd::EventFdInode;
-use crate::libs::lazy_init::Lazy;
 use crate::perf::PerfEventInode;
 use crate::{
-    arch::MMArch,
     driver::{
         base::{block::SeekFrom, device::DevicePrivateData},
         tty::tty_device::TtyFilePrivateData,
@@ -22,7 +19,6 @@ use crate::{
     filesystem::procfs::ProcfsFilePrivateData,
     ipc::pipe::{LockedPipeInode, PipeFsPrivateData},
     libs::{rwlock::RwLock, spinlock::SpinLock},
-    mm::{page::Page, MemoryManagementArch},
     net::{
         event_poll::{EPollItem, EPollPrivateData, EventPoll},
         socket::Inode as SocketInode,
@@ -124,75 +120,6 @@ impl FileMode {
     }
 }
 
-/// 页面缓存
-pub struct PageCache {
-    xarray: SpinLock<XArray<Arc<Page>>>,
-    inode: Lazy<Weak<dyn IndexNode>>,
-}
-
-impl core::fmt::Debug for PageCache {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        f.debug_struct("PageCache")
-            .field(
-                "xarray",
-                &self
-                    .xarray
-                    .lock()
-                    .range(0..((MMArch::PAGE_ADDRESS_SIZE >> MMArch::PAGE_SHIFT) as u64))
-                    .map(|(_, r)| (*r).clone())
-                    .collect::<Vec<Arc<Page>>>(),
-            )
-            .finish()
-    }
-}
-
-impl PageCache {
-    pub fn new(inode: Option<Weak<dyn IndexNode>>) -> Arc<PageCache> {
-        let page_cache = Self {
-            xarray: SpinLock::new(XArray::new()),
-            inode: {
-                let v: Lazy<Weak<dyn IndexNode>> = Lazy::new();
-                if let Some(inode) = inode {
-                    v.init(inode);
-                }
-                v
-            },
-        };
-        Arc::new(page_cache)
-    }
-
-    pub fn inode(&self) -> Option<Weak<dyn IndexNode>> {
-        self.inode.try_get().cloned()
-    }
-
-    pub fn add_page(&self, offset: usize, page: &Arc<Page>) {
-        let mut guard = self.xarray.lock();
-        let mut cursor = guard.cursor_mut(offset as u64);
-        cursor.store(page.clone());
-    }
-
-    pub fn get_page(&self, offset: usize) -> Option<Arc<Page>> {
-        let mut guard = self.xarray.lock();
-        let mut cursor = guard.cursor_mut(offset as u64);
-        let page = cursor.load().map(|r| (*r).clone());
-        page
-    }
-
-    pub fn remove_page(&self, offset: usize) {
-        let mut guard = self.xarray.lock();
-        let mut cursor = guard.cursor_mut(offset as u64);
-        cursor.remove();
-    }
-
-    pub fn set_inode(&self, inode: Weak<dyn IndexNode>) -> Result<(), SystemError> {
-        if self.inode.initialized() {
-            return Err(SystemError::EINVAL);
-        }
-        self.inode.init(inode);
-        Ok(())
-    }
-}
-
 /// @brief 抽象文件结构体
 #[derive(Debug)]
 pub struct File {
@@ -238,13 +165,16 @@ impl File {
         return Ok(f);
     }
 
-    /// @brief 从文件中读取指定的字节数到buffer中
+    /// ## 从文件中读取指定的字节数到buffer中
     ///
-    /// @param len 要读取的字节数
-    /// @param buf 目标buffer
+    /// ### 参数
+    /// - `len`: 要读取的字节数
+    /// - `buf`: 缓冲区
+    /// - `read_direct`: 忽略缓存,直接读取磁盘
     ///
-    /// @return Ok(usize) 成功读取的字节数
-    /// @return Err(SystemError) 错误码
+    /// ### 返回值
+    /// - `Ok(usize)`: 成功读取的字节数
+    /// - `Err(SystemError)`: 错误码
     pub fn read(&self, len: usize, buf: &mut [u8]) -> Result<usize, SystemError> {
         self.do_read(
             self.offset.load(core::sync::atomic::Ordering::SeqCst),
@@ -254,13 +184,16 @@ impl File {
         )
     }
 
-    /// @brief 从buffer向文件写入指定的字节数的数据
+    /// ## 从buffer向文件写入指定的字节数的数据
     ///
-    /// @param len 要写入的字节数
-    /// @param buf 源数据buffer
+    /// ### 参数
+    /// - `offset`: 文件偏移量
+    /// - `len`: 要写入的字节数
+    /// - `buf`: 写入缓冲区
     ///
-    /// @return Ok(usize) 成功写入的字节数
-    /// @return Err(SystemError) 错误码
+    /// ### 返回值
+    /// - `Ok(usize)`: 成功写入的字节数
+    /// - `Err(SystemError)`: 错误码
     pub fn write(&self, len: usize, buf: &[u8]) -> Result<usize, SystemError> {
         self.do_write(
             self.offset.load(core::sync::atomic::Ordering::SeqCst),
@@ -309,16 +242,13 @@ impl File {
             return Err(SystemError::ENOBUFS);
         }
 
-        let len = self
-            .inode
-            .read_at(offset, len, buf, self.private_data.lock())
-            .map_err(|e| {
-                if e == SystemError::ERESTARTSYS {
-                    SystemError::EINTR
-                } else {
-                    e
-                }
-            })?;
+        let len = if self.mode().contains(FileMode::O_DIRECT) {
+            self.inode
+                .read_direct(offset, len, buf, self.private_data.lock())
+        } else {
+            self.inode
+                .read_at(offset, len, buf, self.private_data.lock())
+        }?;
 
         if update_offset {
             self.offset
@@ -343,24 +273,11 @@ impl File {
 
         // 如果文件指针已经超过了文件大小,则需要扩展文件大小
         if offset > self.inode.metadata()?.size as usize {
-            self.inode.resize(offset).map_err(|e| {
-                if e == SystemError::ERESTARTSYS {
-                    SystemError::EINTR
-                } else {
-                    e
-                }
-            })?;
+            self.inode.resize(offset)?;
         }
         let len = self
             .inode
-            .write_at(offset, len, buf, self.private_data.lock())
-            .map_err(|e| {
-                if e == SystemError::ERESTARTSYS {
-                    SystemError::EINTR
-                } else {
-                    e
-                }
-            })?;
+            .write_at(offset, len, buf, self.private_data.lock())?;
 
         if update_offset {
             self.offset

+ 58 - 6
kernel/src/filesystem/vfs/mod.rs

@@ -24,14 +24,11 @@ use crate::{
     time::PosixTimeSpec,
 };
 
-use self::{
-    core::generate_inode_id,
-    file::{FileMode, PageCache},
-    syscall::ModeType,
-    utils::DName,
-};
+use self::{core::generate_inode_id, file::FileMode, syscall::ModeType, utils::DName};
 pub use self::{core::ROOT_INODE, file::FilePrivateData, mount::MountFS};
 
+use super::page_cache::PageCache;
+
 /// vfs容许的最大的路径名称长度
 pub const MAX_PATHLEN: usize = 1024;
 
@@ -128,6 +125,15 @@ pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync {
     fn mmap(&self, _start: usize, _len: usize, _offset: usize) -> Result<(), SystemError> {
         return Err(SystemError::ENOSYS);
     }
+
+    fn read_sync(&self, _offset: usize, _buf: &mut [u8]) -> Result<usize, SystemError> {
+        return Err(SystemError::ENOSYS);
+    }
+
+    fn write_sync(&self, _offset: usize, _buf: &[u8]) -> Result<usize, SystemError> {
+        return Err(SystemError::ENOSYS);
+    }
+
     /// @brief 打开文件
     ///
     /// @return 成功:Ok()
@@ -184,6 +190,52 @@ pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync {
         _data: SpinLockGuard<FilePrivateData>,
     ) -> Result<usize, SystemError>;
 
+    /// # 在inode的指定偏移量开始,读取指定大小的数据,忽略PageCache
+    ///
+    /// ## 参数
+    ///
+    /// - `offset`: 起始位置在Inode中的偏移量
+    /// - `len`: 要读取的字节数
+    /// - `buf`: 缓冲区
+    /// - `data`: 各文件系统系统所需私有信息
+    ///
+    /// ## 返回值
+    ///
+    /// - `Ok(usize)``: Ok(读取的字节数)
+    /// - `Err(SystemError)``: Err(Posix错误码)
+    fn read_direct(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &mut [u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        return Err(SystemError::ENOSYS);
+    }
+
+    /// # 在inode的指定偏移量开始,写入指定大小的数据,忽略PageCache
+    ///
+    /// ## 参数
+    ///
+    /// - `offset`: 起始位置在Inode中的偏移量
+    /// - `len`: 要读取的字节数
+    /// - `buf`: 缓冲区
+    /// - `data`: 各文件系统系统所需私有信息
+    ///
+    /// ## 返回值
+    ///
+    /// - `Ok(usize)``: Ok(读取的字节数)
+    /// - `Err(SystemError)``: Err(Posix错误码)
+    fn write_direct(
+        &self,
+        _offset: usize,
+        _len: usize,
+        _buf: &[u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        return Err(SystemError::ENOSYS);
+    }
+
     /// @brief 获取当前inode的状态。
     ///
     /// @return PollStatus结构体

+ 23 - 5
kernel/src/filesystem/vfs/mount.rs

@@ -14,7 +14,7 @@ use system_error::SystemError;
 
 use crate::{
     driver::base::device::device_number::DeviceNumber,
-    filesystem::vfs::ROOT_INODE,
+    filesystem::{page_cache::PageCache, vfs::ROOT_INODE},
     libs::{
         casting::DowncastArc,
         rwlock::RwLock,
@@ -24,10 +24,8 @@ use crate::{
 };
 
 use super::{
-    file::{FileMode, PageCache},
-    syscall::ModeType,
-    utils::DName,
-    FilePrivateData, FileSystem, FileType, IndexNode, InodeId, Magic, SuperBlock,
+    file::FileMode, syscall::ModeType, utils::DName, FilePrivateData, FileSystem, FileType,
+    IndexNode, InodeId, Magic, SuperBlock,
 };
 
 const MOUNTFS_BLOCK_SIZE: u64 = 512;
@@ -296,6 +294,26 @@ impl IndexNode for MountFSInode {
         return self.inner_inode.write_at(offset, len, buf, data);
     }
 
+    fn read_direct(
+        &self,
+        offset: usize,
+        len: usize,
+        buf: &mut [u8],
+        data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        self.inner_inode.read_direct(offset, len, buf, data)
+    }
+
+    fn write_direct(
+        &self,
+        offset: usize,
+        len: usize,
+        buf: &[u8],
+        data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        self.inner_inode.write_direct(offset, len, buf, data)
+    }
+
     #[inline]
     fn fs(&self) -> Arc<dyn FileSystem> {
         return self.mount_fs.clone();

+ 16 - 0
kernel/src/filesystem/vfs/syscall.rs

@@ -739,6 +739,22 @@ impl Syscall {
         }
     }
 
+    pub fn fchdir(fd: i32) -> Result<usize, SystemError> {
+        let pcb = ProcessManager::current_pcb();
+        let file = pcb
+            .fd_table()
+            .read()
+            .get_file_by_fd(fd)
+            .ok_or(SystemError::EBADF)?;
+        let inode = file.inode();
+        if inode.metadata()?.file_type != FileType::Dir {
+            return Err(SystemError::ENOTDIR);
+        }
+        let path = inode.absolute_path()?;
+        pcb.basic_mut().set_cwd(path);
+        return Ok(0);
+    }
+
     /// @brief 获取当前进程的工作目录路径
     ///
     /// @param buf 指向缓冲区的指针

+ 4 - 1
kernel/src/ipc/pipe.rs

@@ -11,7 +11,7 @@ use crate::{
         wait_queue::WaitQueue,
     },
     net::event_poll::{EPollEventType, EPollItem, EventPoll},
-    process::{ProcessManager, ProcessState},
+    process::{ProcessFlags, ProcessManager, ProcessState},
     sched::SchedMode,
     time::PosixTimeSpec,
 };
@@ -232,6 +232,9 @@ impl IndexNode for LockedPipeInode {
             drop(inode);
             let r = wq_wait_event_interruptible!(self.read_wait_queue, self.readable(), {});
             if r.is_err() {
+                ProcessManager::current_pcb()
+                    .flags()
+                    .insert(ProcessFlags::HAS_PENDING_SIGNAL);
                 return Err(SystemError::ERESTARTSYS);
             }
 

+ 27 - 35
kernel/src/ipc/shm.rs

@@ -7,16 +7,15 @@ use crate::{
     },
     mm::{
         allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame},
-        page::{page_manager_lock_irqsave, Page},
+        page::{page_manager_lock_irqsave, PageFlags, PageType},
         PhysAddr,
     },
     process::{Pid, ProcessManager},
     syscall::user_access::{UserBufferReader, UserBufferWriter},
     time::PosixTimeSpec,
 };
-use alloc::{sync::Arc, vec::Vec};
 use core::sync::atomic::{compiler_fence, Ordering};
-use hashbrown::{HashMap, HashSet};
+use hashbrown::HashMap;
 use ida::IdAllocator;
 use log::info;
 use num::ToPrimitive;
@@ -159,21 +158,16 @@ impl ShmManager {
 
         // 分配共享内存页面
         let page_count = PageFrameCount::from_bytes(page_align_up(size)).unwrap();
-        let phys_page =
-            unsafe { LockedFrameAllocator.allocate(page_count) }.ok_or(SystemError::EINVAL)?;
         // 创建共享内存page,并添加到PAGE_MANAGER中
         let mut page_manager_guard = page_manager_lock_irqsave();
-        let mut cur_phys = PhysPageFrame::new(phys_page.0);
-        for _ in 0..page_count.data() {
-            let page = Arc::new(Page::new(true, cur_phys.phys_address()));
-            page.write_irqsave().set_shm_id(shm_id);
-            let paddr = cur_phys.phys_address();
-            page_manager_guard.insert(paddr, &page);
-            cur_phys = cur_phys.next();
-        }
+        let (paddr, _page) = page_manager_guard.create_pages(
+            PageType::Shm(shm_id),
+            PageFlags::PG_UNEVICTABLE,
+            &mut LockedFrameAllocator,
+            page_count,
+        )?;
 
         // 创建共享内存信息结构体
-        let paddr = phys_page.0;
         let kern_ipc_perm = KernIpcPerm {
             id: shm_id,
             key,
@@ -323,9 +317,10 @@ impl ShmManager {
         let mut page_manager_guard = page_manager_lock_irqsave();
         if map_count > 0 {
             // 设置共享内存物理页当映射计数等于0时可被回收
+            // TODO 后续需要加入到lru中
             for _ in 0..count.data() {
                 let page = page_manager_guard.get_unwrap(&cur_phys.phys_address());
-                page.write_irqsave().set_dealloc_when_zero(true);
+                page.write_irqsave().remove_flags(PageFlags::PG_UNEVICTABLE);
 
                 cur_phys = cur_phys.next();
             }
@@ -375,6 +370,8 @@ pub struct KernelShm {
     shm_start_paddr: PhysAddr,
     /// 共享内存大小(bytes),注意是用户指定的大小(未经过页面对齐)
     shm_size: usize,
+    /// 映射计数
+    map_count: usize,
     /// 最后一次连接的时间
     shm_atim: PosixTimeSpec,
     /// 最后一次断开连接的时间
@@ -394,6 +391,7 @@ impl KernelShm {
             kern_ipc_perm,
             shm_start_paddr,
             shm_size,
+            map_count: 0,
             shm_atim: PosixTimeSpec::new(0, 0),
             shm_dtim: PosixTimeSpec::new(0, 0),
             shm_ctim: PosixTimeSpec::now(),
@@ -436,26 +434,7 @@ impl KernelShm {
 
     /// 共享内存段的映射计数(有多少个不同的VMA映射)
     pub fn map_count(&self) -> usize {
-        let mut page_manager_guard = page_manager_lock_irqsave();
-        let mut id_set: HashSet<usize> = HashSet::new();
-        let mut cur_phys = PhysPageFrame::new(self.shm_start_paddr);
-        let page_count = PageFrameCount::from_bytes(page_align_up(self.shm_size)).unwrap();
-
-        for _ in 0..page_count.data() {
-            let page = page_manager_guard.get(&cur_phys.phys_address()).unwrap();
-            id_set.extend(
-                page.read_irqsave()
-                    .anon_vma()
-                    .iter()
-                    .map(|vma| vma.id())
-                    .collect::<Vec<_>>(),
-            );
-
-            cur_phys = cur_phys.next();
-        }
-
-        // 由于LockedVMA的id是独一无二的,因此有多少个不同的id,就代表着有多少个不同的VMA映射到共享内存段
-        return id_set.len();
+        self.map_count
     }
 
     pub fn copy_from(&mut self, shm_id_ds: PosixShmIdDs) {
@@ -474,6 +453,19 @@ impl KernelShm {
 
         self.update_ctim();
     }
+
+    pub fn mode(&self) -> &ShmFlags {
+        &self.kern_ipc_perm.mode
+    }
+
+    pub fn increase_count(&mut self) {
+        self.map_count += 1;
+    }
+
+    pub fn decrease_count(&mut self) {
+        assert!(self.map_count > 0, "map_count is zero");
+        self.map_count -= 1;
+    }
 }
 
 /// 共享内存权限信息

+ 202 - 30
kernel/src/ipc/signal.rs

@@ -8,7 +8,9 @@ use crate::{
     arch::ipc::signal::{SigCode, SigFlags, SigSet, Signal},
     ipc::signal_types::SigactionType,
     libs::spinlock::SpinLockGuard,
-    process::{pid::PidType, Pid, ProcessControlBlock, ProcessFlags, ProcessManager},
+    process::{
+        pid::PidType, Pid, ProcessControlBlock, ProcessFlags, ProcessManager, ProcessSignalInfo,
+    },
 };
 
 use super::signal_types::{
@@ -25,7 +27,7 @@ impl Signal {
             return false;
         }
 
-        if !pcb.has_pending_signal() {
+        if !pcb.has_pending_signal_fast() {
             return false;
         }
 
@@ -112,7 +114,7 @@ impl Signal {
         }
 
         if !self.prepare_sianal(pcb.clone(), force_send) {
-            return Err(SystemError::EINVAL);
+            return Ok(0);
         }
         // debug!("force send={}", force_send);
         let pcb_info = pcb.sig_info_irqsave();
@@ -213,13 +215,18 @@ impl Signal {
         }
     }
 
-    /// @brief 本函数用于检测指定的进程是否想要接收SIG这个信号。
+    /// 本函数用于检测指定的进程是否想要接收SIG这个信号。
+    ///
     /// 当我们对于进程组中的所有进程都运行了这个检查之后,我们将可以找到组内愿意接收信号的进程。
     /// 这么做是为了防止我们把信号发送给了一个正在或已经退出的进程,或者是不响应该信号的进程。
     #[inline]
     fn wants_signal(&self, pcb: Arc<ProcessControlBlock>) -> bool {
         // 如果改进程屏蔽了这个signal,则不能接收
-        if pcb.sig_info_irqsave().sig_block().contains((*self).into()) {
+        if pcb
+            .sig_info_irqsave()
+            .sig_blocked()
+            .contains((*self).into())
+        {
             return false;
         }
 
@@ -291,7 +298,7 @@ impl Signal {
         // 一个被阻塞了的信号肯定是要被处理的
         if pcb
             .sig_info_irqsave()
-            .sig_block()
+            .sig_blocked()
             .contains(self.into_sigset())
         {
             return true;
@@ -316,6 +323,7 @@ fn signal_wake_up(pcb: Arc<ProcessControlBlock>, _guard: SpinLockGuard<SignalStr
     // debug!("signal_wake_up");
     // 如果目标进程已经在运行,则发起一个ipi,使得它陷入内核
     let state = pcb.sched_info().inner_lock_read_irqsave().state();
+    pcb.flags().insert(ProcessFlags::HAS_PENDING_SIGNAL);
     let mut wakeup_ok = true;
     if state.is_blocked_interruptable() {
         ProcessManager::wakeup(&pcb).unwrap_or_else(|e| {
@@ -350,16 +358,67 @@ fn signal_wake_up(pcb: Arc<ProcessControlBlock>, _guard: SpinLockGuard<SignalStr
     }
 }
 
-/// @brief 当一个进程具有多个线程之后,在这里需要重新计算线程的flag中的TIF_SIGPENDING位
-fn recalc_sigpending() {
-    // todo:
+fn has_pending_signals(sigset: &SigSet, blocked: &SigSet) -> bool {
+    sigset.bits() & (!blocked.bits()) != 0
+}
+
+impl ProcessControlBlock {
+    /// 重新计算线程的flag中的TIF_SIGPENDING位
+    /// 参考: https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/signal.c?r=&mo=4806&fi=182#182
+    pub fn recalc_sigpending(&self, siginfo_guard: Option<&ProcessSignalInfo>) {
+        if !self.recalc_sigpending_tsk(siginfo_guard) {
+            self.flags().remove(ProcessFlags::HAS_PENDING_SIGNAL);
+        }
+    }
+
+    fn recalc_sigpending_tsk(&self, siginfo_guard: Option<&ProcessSignalInfo>) -> bool {
+        let mut _siginfo_tmp_guard = None;
+        let siginfo = if let Some(siginfo_guard) = siginfo_guard {
+            siginfo_guard
+        } else {
+            _siginfo_tmp_guard = Some(self.sig_info_irqsave());
+            _siginfo_tmp_guard.as_ref().unwrap()
+        };
+        return siginfo.do_recalc_sigpending_tsk(self);
+    }
 }
 
-/// @brief 刷新指定进程的sighand的sigaction,将满足条件的sigaction恢复为Default
-///     除非某个信号被设置为ignore且force_default为false,否则都不会将其恢复
+impl ProcessSignalInfo {
+    fn do_recalc_sigpending_tsk(&self, pcb: &ProcessControlBlock) -> bool {
+        if has_pending_signals(&self.sig_pending().signal(), self.sig_blocked())
+            || has_pending_signals(&self.sig_shared_pending().signal(), self.sig_blocked())
+        {
+            pcb.flags().insert(ProcessFlags::HAS_PENDING_SIGNAL);
+            return true;
+        }
+        /*
+         * We must never clear the flag in another thread, or in current
+         * when it's possible the current syscall is returning -ERESTART*.
+         * So we don't clear it here, and only callers who know they should do.
+         */
+        return false;
+    }
+}
+/// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/sched/signal.h?fi=restore_saved_sigmask#547
+pub fn restore_saved_sigmask() {
+    if ProcessManager::current_pcb()
+        .flags()
+        .test_and_clear(ProcessFlags::RESTORE_SIG_MASK)
+    {
+        let saved = *ProcessManager::current_pcb()
+            .sig_info_irqsave()
+            .saved_sigmask();
+        __set_current_blocked(&saved);
+    }
+}
+
+/// 刷新指定进程的sighand的sigaction,将满足条件的sigaction恢复为默认状态。
+/// 除非某个信号被设置为忽略且 `force_default` 为 `false`,否则都不会将其恢复。
+///
+/// # 参数
 ///
-/// @param pcb 要被刷新的pcb
-/// @param force_default 是否强制将sigaction恢复成默认状态
+/// - `pcb`: 要被刷新的pcb。
+/// - `force_default`: 是否强制将sigaction恢复成默认状态。
 pub fn flush_signal_handlers(pcb: Arc<ProcessControlBlock>, force_default: bool) {
     compiler_fence(core::sync::atomic::Ordering::SeqCst);
     // debug!("hand=0x{:018x}", hand as *const sighand_struct as usize);
@@ -441,31 +500,144 @@ pub(super) fn do_sigaction(
     return Ok(());
 }
 
-/// 设置当前进程的屏蔽信号 (sig_block),待引入 [sigprocmask](https://man7.org/linux/man-pages/man2/sigprocmask.2.html) 系统调用后要删除这个散装函数
-///
-/// ## 参数
-///
-/// - `new_set` 新的屏蔽信号bitmap的值
-pub fn set_current_sig_blocked(new_set: &mut SigSet) {
-    let to_remove: SigSet =
-        <Signal as Into<SigSet>>::into(Signal::SIGKILL) | Signal::SIGSTOP.into();
-    new_set.remove(to_remove);
-    //TODO 把这个散装函数用 sigsetops 替换掉
-    let pcb = ProcessManager::current_pcb();
+/// https://code.dragonos.org.cn/xref/linux-6.6.21/include/uapi/asm-generic/signal-defs.h#72
+/// 对应SIG_BLOCK,SIG_UNBLOCK,SIG_SETMASK
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SigHow {
+    Block = 0,
+    Unblock = 1,
+    SetMask = 2,
+}
+
+impl TryFrom<i32> for SigHow {
+    type Error = SystemError;
+    fn try_from(value: i32) -> Result<Self, Self::Error> {
+        match value {
+            0 => Ok(SigHow::Block),
+            1 => Ok(SigHow::Unblock),
+            2 => Ok(SigHow::SetMask),
+            _ => Err(SystemError::EINVAL),
+        }
+    }
+}
+
+fn __set_task_blocked(pcb: &Arc<ProcessControlBlock>, new_set: &SigSet) {
+    //todo 还有一个对线程组是否为空的判断,进程组、线程组实现之后,需要更改这里。
+    if pcb.has_pending_signal() {
+        let mut newblocked = *new_set;
+        let guard = pcb.sig_info_irqsave();
+        newblocked.remove(*guard.sig_blocked());
+        drop(guard);
+
+        // 从主线程开始去遍历
+        if let Some(group_leader) = pcb.threads_read_irqsave().group_leader() {
+            retarget_shared_pending(group_leader, newblocked);
+        }
+    }
+    *pcb.sig_info_mut().sig_block_mut() = *new_set;
+    pcb.recalc_sigpending(None);
+}
 
+fn __set_current_blocked(new_set: &SigSet) {
+    let pcb = ProcessManager::current_pcb();
     /*
         如果当前pcb的sig_blocked和新的相等,那么就不用改变它。
         请注意,一个进程的sig_blocked字段不能被其他进程修改!
     */
-    if pcb.sig_info_irqsave().sig_block().eq(new_set) {
+    if pcb.sig_info_irqsave().sig_blocked().eq(new_set) {
         return;
     }
+    let guard: SpinLockGuard<'_, SignalStruct> = pcb.sig_struct_irqsave();
 
-    let guard = pcb.sig_struct_irqsave();
-    // todo: 当一个进程有多个线程后,在这里需要设置每个线程的block字段,并且 retarget_shared_pending(虽然我还没搞明白linux这部分是干啥的)
+    __set_task_blocked(&pcb, new_set);
 
-    // 设置当前进程的sig blocked
-    *pcb.sig_info_mut().sig_block_mut() = *new_set;
-    recalc_sigpending();
     drop(guard);
 }
+
+fn retarget_shared_pending(pcb: Arc<ProcessControlBlock>, which: SigSet) {
+    let retarget = pcb.sig_info_irqsave().sig_shared_pending().signal();
+    retarget.intersects(which);
+    if retarget.is_empty() {
+        return;
+    }
+
+    // 对于线程组中的每一个线程都要执行的函数
+    let thread_handling_function = |pcb: Arc<ProcessControlBlock>, retarget: &SigSet| {
+        if retarget.is_empty() {
+            return;
+        }
+
+        if pcb.flags().contains(ProcessFlags::EXITING) {
+            return;
+        }
+
+        let blocked = pcb.sig_info_irqsave().sig_shared_pending().signal();
+        if retarget.difference(blocked).is_empty() {
+            return;
+        }
+
+        retarget.intersects(blocked);
+        if !pcb.has_pending_signal() {
+            let guard = pcb.sig_struct_irqsave();
+            signal_wake_up(pcb.clone(), guard, false);
+        }
+        // 之前的对retarget的判断移动到最前面,因为对于当前线程的线程的处理已经结束,对于后面的线程在一开始判断retarget为空即可结束处理
+
+        // debug!("handle done");
+    };
+
+    // 暴力遍历每一个线程,找到相同的tgid
+    let tgid = pcb.tgid();
+    for &pid in pcb.children_read_irqsave().iter() {
+        if let Some(child) = ProcessManager::find(pid) {
+            if child.tgid() == tgid {
+                thread_handling_function(child, &retarget);
+            }
+        }
+    }
+    // debug!("retarget_shared_pending done!");
+}
+
+/// 设置当前进程的屏蔽信号 (sig_block)
+///
+/// ## 参数
+///
+/// - `new_set` 新的屏蔽信号bitmap的值
+pub fn set_current_blocked(new_set: &mut SigSet) {
+    let to_remove: SigSet =
+        <Signal as Into<SigSet>>::into(Signal::SIGKILL) | Signal::SIGSTOP.into();
+    new_set.remove(to_remove);
+    __set_current_blocked(new_set);
+}
+
+/// 设置当前进程的屏蔽信号 (sig_block)
+///
+/// ## 参数
+///
+/// - `how` 设置方式
+/// - `new_set` 新的屏蔽信号bitmap的值
+pub fn set_sigprocmask(how: SigHow, set: SigSet) -> Result<SigSet, SystemError> {
+    let pcb: Arc<ProcessControlBlock> = ProcessManager::current_pcb();
+    let guard = pcb.sig_info_irqsave();
+    let oset = *guard.sig_blocked();
+
+    let mut res_set = oset;
+    drop(guard);
+
+    match how {
+        SigHow::Block => {
+            // debug!("SIG_BLOCK\tGoing to insert is: {}", set.bits());
+            res_set.insert(set);
+        }
+        SigHow::Unblock => {
+            res_set.remove(set);
+        }
+        SigHow::SetMask => {
+            // debug!("SIG_SETMASK\tGoing to set is: {}", set.bits());
+            res_set = set;
+        }
+    }
+
+    __set_current_blocked(&res_set);
+    Ok(oset)
+}

+ 11 - 5
kernel/src/ipc/signal_types.rs

@@ -75,9 +75,15 @@ pub struct InnerSignalStruct {
 impl SignalStruct {
     #[inline(never)]
     pub fn new() -> Self {
-        Self {
+        let mut r = Self {
             inner: Box::<InnerSignalStruct>::default(),
-        }
+        };
+        let sig_ign = Sigaction::default();
+        r.inner.handlers[Signal::SIGCHLD as usize - 1] = sig_ign;
+        r.inner.handlers[Signal::SIGURG as usize - 1] = sig_ign;
+        r.inner.handlers[Signal::SIGWINCH as usize - 1] = sig_ign;
+
+        r
     }
 }
 
@@ -447,8 +453,6 @@ impl SigPending {
             None
         };
 
-        // 当一个进程具有多个线程之后,在这里需要重新计算线程的flag中的TIF_SIGPENDING位
-        // recalc_sigpending();
         return (sig, info);
     }
     /// @brief 从sigpending中删除mask中被置位的信号。也就是说,比如mask的第1位被置为1,那么就从sigqueue中删除所有signum为2的信号的信息。
@@ -539,10 +543,12 @@ impl SigQueue {
 pub trait SignalArch {
     /// 信号处理函数
     ///
+    /// 处理信号或重启系统调用
+    ///
     /// ## 参数
     ///
     /// - `frame` 中断栈帧
-    unsafe fn do_signal(frame: &mut TrapFrame);
+    unsafe fn do_signal_or_restart(frame: &mut TrapFrame);
 
     fn sys_rt_sigreturn(trap_frame: &mut TrapFrame) -> u64;
 }

+ 73 - 25
kernel/src/ipc/syscall.rs

@@ -16,8 +16,7 @@ use crate::{
         FilePrivateData,
     },
     ipc::shm::{shm_manager_lock, IPC_PRIVATE},
-    libs::align::page_align_up,
-    libs::spinlock::SpinLock,
+    libs::{align::page_align_up, spinlock::SpinLock},
     mm::{
         allocator::page_frame::{PageFrameCount, PhysPageFrame, VirtPageFrame},
         page::{page_manager_lock_irqsave, EntryFlags, PageFlushAll},
@@ -35,6 +34,7 @@ use crate::{
 use super::{
     pipe::{LockedPipeInode, PipeFsPrivateData},
     shm::{ShmCtlCmd, ShmFlags, ShmId, ShmKey},
+    signal::{set_sigprocmask, SigHow},
     signal_types::{
         SaHandlerType, SigInfo, SigType, Sigaction, SigactionType, UserSigaction, USER_SIG_DFL,
         USER_SIG_ERR, USER_SIG_IGN,
@@ -404,6 +404,9 @@ impl Syscall {
         // 更新最后一次连接时间
         kernel_shm.update_atim();
 
+        // 映射计数增加
+        kernel_shm.increase_count();
+
         Ok(r)
     }
 
@@ -432,29 +435,6 @@ impl Syscall {
             return Err(SystemError::EINVAL);
         }
 
-        // 获取映射的物理地址
-        let paddr = address_write_guard
-            .user_mapper
-            .utable
-            .translate(vaddr)
-            .ok_or(SystemError::EINVAL)?
-            .0;
-
-        // 如果物理页的shm_id为None,代表不是共享页
-        let mut page_manager_guard = page_manager_lock_irqsave();
-        let page = page_manager_guard.get(&paddr).ok_or(SystemError::EINVAL)?;
-        let shm_id = page.read_irqsave().shm_id().ok_or(SystemError::EINVAL)?;
-        drop(page_manager_guard);
-
-        // 获取对应共享页管理信息
-        let mut shm_manager_guard = shm_manager_lock();
-        let kernel_shm = shm_manager_guard
-            .get_mut(&shm_id)
-            .ok_or(SystemError::EINVAL)?;
-        // 更新最后一次断开连接时间
-        kernel_shm.update_dtim();
-        drop(shm_manager_guard);
-
         // 取消映射
         let flusher: PageFlushAll<MMArch> = PageFlushAll::new();
         vma.unmap(&mut address_write_guard.user_mapper.utable, flusher);
@@ -504,4 +484,72 @@ impl Syscall {
             ShmCtlCmd::Default => Err(SystemError::EINVAL),
         }
     }
+
+    /// # SYS_SIGPROCMASK系统调用函数,用于设置或查询当前进程的信号屏蔽字
+    ///
+    /// ## 参数
+    ///
+    /// - `how`: 指示如何修改信号屏蔽字
+    /// - `nset`: 新的信号屏蔽字
+    /// - `oset`: 旧的信号屏蔽字的指针,由于可以是NULL,所以用Option包装
+    /// - `sigsetsize`: 信号集的大小
+    ///
+    /// ## 返回值
+    ///
+    /// 成功:0
+    /// 失败:错误码
+    ///
+    /// ## 说明
+    /// 根据 https://man7.org/linux/man-pages/man2/sigprocmask.2.html ,传进来的oldset和newset都是指针类型,这里选择传入usize然后转换为u64的指针类型
+    pub fn rt_sigprocmask(
+        how: i32,
+        newset: usize,
+        oldset: usize,
+        sigsetsize: usize,
+    ) -> Result<usize, SystemError> {
+        // 对应oset传进来一个NULL的情况
+        let oset = if oldset == 0 { None } else { Some(oldset) };
+        let nset = if newset == 0 { None } else { Some(newset) };
+
+        if sigsetsize != size_of::<SigSet>() {
+            return Err(SystemError::EFAULT);
+        }
+
+        let sighow = SigHow::try_from(how)?;
+
+        let mut new_set = SigSet::default();
+        if let Some(nset) = nset {
+            let reader = UserBufferReader::new(
+                VirtAddr::new(nset).as_ptr::<u64>(),
+                core::mem::size_of::<u64>(),
+                true,
+            )?;
+
+            let nset = reader.read_one_from_user::<u64>(0)?;
+            new_set = SigSet::from_bits_truncate(*nset);
+            // debug!("Get Newset: {}", &new_set.bits());
+            let to_remove: SigSet =
+                <Signal as Into<SigSet>>::into(Signal::SIGKILL) | Signal::SIGSTOP.into();
+            new_set.remove(to_remove);
+        }
+
+        let oldset_to_return = set_sigprocmask(sighow, new_set)?;
+        if let Some(oldset) = oset {
+            // debug!("Get Oldset to return: {}", &oldset_to_return.bits());
+            let mut writer = UserBufferWriter::new(
+                VirtAddr::new(oldset).as_ptr::<u64>(),
+                core::mem::size_of::<u64>(),
+                true,
+            )?;
+            writer.copy_one_to_user::<u64>(&oldset_to_return.bits(), 0)?;
+        }
+
+        Ok(0)
+    }
+
+    pub fn restart_syscall() -> Result<usize, SystemError> {
+        // todo: https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/signal.c#2998
+        unimplemented!("restart_syscall with restart block");
+        // Err(SystemError::ENOSYS)
+    }
 }

+ 1 - 1
kernel/src/libs/semaphore.rs

@@ -37,7 +37,7 @@ impl Semaphore {
     fn down(&self) {
         if self.counter.fetch_sub(1, Ordering::Release) <= 0 {
             self.counter.fetch_add(1, Ordering::Relaxed);
-            self.wait_queue.sleep();
+            self.wait_queue.sleep().ok();
             //资源不充足,信号量<=0, 此时进程睡眠
         }
     }

+ 86 - 34
kernel/src/libs/wait_queue.rs

@@ -1,7 +1,7 @@
 // #![allow(dead_code)]
 use core::intrinsics::unlikely;
 
-use alloc::{collections::LinkedList, sync::Arc, vec::Vec};
+use alloc::{collections::VecDeque, sync::Arc, vec::Vec};
 use log::{error, warn};
 use system_error::SystemError;
 
@@ -19,23 +19,40 @@ use super::{
 
 #[derive(Debug)]
 struct InnerWaitQueue {
+    /// 等待队列是否已经死亡, 如果已经死亡, 则不能再添加新的等待进程
+    dead: bool,
     /// 等待队列的链表
-    wait_list: LinkedList<Arc<ProcessControlBlock>>,
+    wait_list: VecDeque<Arc<ProcessControlBlock>>,
 }
 
 /// 被自旋锁保护的等待队列
 #[derive(Debug)]
-pub struct WaitQueue(SpinLock<InnerWaitQueue>);
+pub struct WaitQueue {
+    inner: SpinLock<InnerWaitQueue>,
+}
 
 #[allow(dead_code)]
 impl WaitQueue {
     pub const fn default() -> Self {
-        WaitQueue(SpinLock::new(InnerWaitQueue::INIT))
+        WaitQueue {
+            inner: SpinLock::new(InnerWaitQueue::INIT),
+        }
+    }
+
+    fn inner_irqsave(&self) -> SpinLockGuard<InnerWaitQueue> {
+        self.inner.lock_irqsave()
+    }
+
+    fn inner(&self) -> SpinLockGuard<InnerWaitQueue> {
+        self.inner.lock()
     }
 
     pub fn prepare_to_wait_event(&self, interruptible: bool) -> Result<(), SystemError> {
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
         let pcb = ProcessManager::current_pcb();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         if Signal::signal_pending_state(interruptible, false, &pcb) {
             return Err(SystemError::ERESTARTSYS);
         } else {
@@ -51,7 +68,7 @@ impl WaitQueue {
     pub fn finish_wait(&self) {
         let pcb = ProcessManager::current_pcb();
         let mut writer = pcb.sched_info().inner_lock_write_irqsave();
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
 
         writer.set_state(ProcessState::Runnable);
         writer.set_wakeup();
@@ -62,34 +79,49 @@ impl WaitQueue {
     }
 
     /// @brief 让当前进程在等待队列上进行等待,并且,允许被信号打断
-    pub fn sleep(&self) {
+    pub fn sleep(&self) -> Result<(), SystemError> {
         before_sleep_check(0);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(true).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(guard);
         schedule(SchedMode::SM_NONE);
+        Ok(())
+    }
+
+    /// 标记等待队列已经死亡,不能再添加新的等待进程
+    pub fn mark_dead(&self) {
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        guard.dead = true;
+        drop(guard);
     }
 
     /// @brief 让当前进程在等待队列上进行等待,并且,在释放waitqueue的锁之前,执行f函数闭包
-    pub fn sleep_with_func<F>(&self, f: F)
+    pub fn sleep_with_func<F>(&self, f: F) -> Result<(), SystemError>
     where
         F: FnOnce(),
     {
         before_sleep_check(0);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
-        let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
+
         ProcessManager::mark_sleep(true).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
-        drop(irq_guard);
         guard.wait_list.push_back(ProcessManager::current_pcb());
         f();
 
         drop(guard);
         schedule(SchedMode::SM_NONE);
+
+        Ok(())
     }
 
     /// @brief 让当前进程在等待队列上进行等待. 但是,在释放waitqueue的锁之后,不会调用调度函数。
@@ -106,80 +138,95 @@ impl WaitQueue {
     ///
     /// 由于sleep_without_schedule不会调用调度函数,因此,如果开发者忘记在执行本函数之后,手动调用调度函数,
     /// 由于时钟中断到来或者‘其他cpu kick了当前cpu’,可能会导致一些未定义的行为。
-    pub unsafe fn sleep_without_schedule(&self) {
+    pub unsafe fn sleep_without_schedule(&self) -> Result<(), SystemError> {
         before_sleep_check(1);
         // 安全检查:确保当前处于中断禁止状态
         assert!(!CurrentIrqArch::is_irq_enabled());
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(true).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(guard);
+        Ok(())
     }
 
-    pub unsafe fn sleep_without_schedule_uninterruptible(&self) {
+    pub unsafe fn sleep_without_schedule_uninterruptible(&self) -> Result<(), SystemError> {
         before_sleep_check(1);
         // 安全检查:确保当前处于中断禁止状态
         assert!(!CurrentIrqArch::is_irq_enabled());
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(false).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(guard);
+        Ok(())
     }
     /// @brief 让当前进程在等待队列上进行等待,并且,不允许被信号打断
-    pub fn sleep_uninterruptible(&self) {
+    pub fn sleep_uninterruptible(&self) -> Result<(), SystemError> {
         before_sleep_check(0);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
-        let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(false).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
-        drop(irq_guard);
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(guard);
         schedule(SchedMode::SM_NONE);
+        Ok(())
     }
 
     /// @brief 让当前进程在等待队列上进行等待,并且,允许被信号打断。
     /// 在当前进程的pcb加入队列后,解锁指定的自旋锁。
-    pub fn sleep_unlock_spinlock<T>(&self, to_unlock: SpinLockGuard<T>) {
+    pub fn sleep_unlock_spinlock<T>(&self, to_unlock: SpinLockGuard<T>) -> Result<(), SystemError> {
         before_sleep_check(1);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
-        let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(true).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
-        drop(irq_guard);
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(to_unlock);
         drop(guard);
         schedule(SchedMode::SM_NONE);
+        Ok(())
     }
 
     /// @brief 让当前进程在等待队列上进行等待,并且,允许被信号打断。
     /// 在当前进程的pcb加入队列后,解锁指定的Mutex。
-    pub fn sleep_unlock_mutex<T>(&self, to_unlock: MutexGuard<T>) {
+    pub fn sleep_unlock_mutex<T>(&self, to_unlock: MutexGuard<T>) -> Result<(), SystemError> {
         before_sleep_check(1);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
-        let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(true).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
-        drop(irq_guard);
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(to_unlock);
         drop(guard);
         schedule(SchedMode::SM_NONE);
+        Ok(())
     }
 
     /// @brief 让当前进程在等待队列上进行等待,并且,不允许被信号打断。
     /// 在当前进程的pcb加入队列后,解锁指定的自旋锁。
     pub fn sleep_uninterruptible_unlock_spinlock<T>(&self, to_unlock: SpinLockGuard<T>) {
         before_sleep_check(1);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
         let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
         ProcessManager::mark_sleep(false).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
@@ -195,7 +242,7 @@ impl WaitQueue {
     /// 在当前进程的pcb加入队列后,解锁指定的Mutex。
     pub fn sleep_uninterruptible_unlock_mutex<T>(&self, to_unlock: MutexGuard<T>) {
         before_sleep_check(1);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
         let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
         ProcessManager::mark_sleep(false).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
@@ -217,7 +264,7 @@ impl WaitQueue {
     /// @return true 成功唤醒进程
     /// @return false 没有唤醒进程
     pub fn wakeup(&self, state: Option<ProcessState>) -> bool {
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
         // 如果队列为空,则返回
         if guard.wait_list.is_empty() {
             return false;
@@ -246,7 +293,7 @@ impl WaitQueue {
     ///
     /// @param state 用于判断的state,如果一个进程与这个state相同,或者为None(表示不进行这个判断),则唤醒这个进程。
     pub fn wakeup_all(&self, state: Option<ProcessState>) {
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
         // 如果队列为空,则返回
         if guard.wait_list.is_empty() {
             return;
@@ -281,14 +328,19 @@ impl WaitQueue {
 
     /// @brief 获得当前等待队列的大小
     pub fn len(&self) -> usize {
-        return self.0.lock().wait_list.len();
+        return self.inner_irqsave().wait_list.len();
     }
 }
 
 impl InnerWaitQueue {
     pub const INIT: InnerWaitQueue = InnerWaitQueue {
-        wait_list: LinkedList::new(),
+        wait_list: VecDeque::new(),
+        dead: false,
     };
+
+    pub fn can_sleep(&self) -> bool {
+        return !self.dead;
+    }
 }
 
 fn before_sleep_check(max_preempt: usize) {

+ 189 - 0
kernel/src/misc/events/kprobe/device.rs

@@ -0,0 +1,189 @@
+use crate::driver::base::class::Class;
+use crate::driver::base::device::bus::Bus;
+use crate::driver::base::device::driver::Driver;
+use crate::driver::base::device::{Device, DeviceCommonData, DeviceType, IdTable};
+use crate::driver::base::kobject::{
+    KObjType, KObject, KObjectCommonData, KObjectState, LockedKObjectState,
+};
+use crate::driver::base::kset::KSet;
+use crate::filesystem::kernfs::KernFSInode;
+use crate::filesystem::sysfs::{Attribute, SysFSOpsSupport};
+use crate::filesystem::vfs::syscall::ModeType;
+use crate::libs::rwlock::{RwLockReadGuard, RwLockWriteGuard};
+use crate::libs::spinlock::{SpinLock, SpinLockGuard};
+use alloc::string::{String, ToString};
+use alloc::sync::{Arc, Weak};
+use core::fmt::Debug;
+use system_error::SystemError;
+
+#[derive(Debug)]
+#[cast_to([sync] Device)]
+pub struct KprobeDevice {
+    inner: SpinLock<InnerKprobeDevice>,
+    kobj_state: LockedKObjectState,
+    name: String,
+}
+
+#[derive(Debug)]
+struct InnerKprobeDevice {
+    kobject_common: KObjectCommonData,
+    device_common: DeviceCommonData,
+}
+
+impl KprobeDevice {
+    pub fn new(parent: Option<Weak<dyn KObject>>) -> Arc<Self> {
+        let bus_device = Self {
+            inner: SpinLock::new(InnerKprobeDevice {
+                kobject_common: KObjectCommonData::default(),
+                device_common: DeviceCommonData::default(),
+            }),
+            kobj_state: LockedKObjectState::new(None),
+            name: "kprobe".to_string(),
+        };
+        bus_device.set_parent(parent);
+        return Arc::new(bus_device);
+    }
+
+    fn inner(&self) -> SpinLockGuard<InnerKprobeDevice> {
+        self.inner.lock()
+    }
+}
+
+impl KObject for KprobeDevice {
+    fn as_any_ref(&self) -> &dyn core::any::Any {
+        self
+    }
+
+    fn set_inode(&self, inode: Option<Arc<KernFSInode>>) {
+        self.inner().kobject_common.kern_inode = inode;
+    }
+
+    fn inode(&self) -> Option<Arc<KernFSInode>> {
+        self.inner().kobject_common.kern_inode.clone()
+    }
+
+    fn parent(&self) -> Option<Weak<dyn KObject>> {
+        self.inner().kobject_common.parent.clone()
+    }
+
+    fn set_parent(&self, parent: Option<Weak<dyn KObject>>) {
+        self.inner().kobject_common.parent = parent;
+    }
+
+    fn kset(&self) -> Option<Arc<KSet>> {
+        self.inner().kobject_common.kset.clone()
+    }
+
+    fn set_kset(&self, kset: Option<Arc<KSet>>) {
+        self.inner().kobject_common.kset = kset;
+    }
+
+    fn kobj_type(&self) -> Option<&'static dyn KObjType> {
+        self.inner().kobject_common.kobj_type
+    }
+
+    fn set_kobj_type(&self, ktype: Option<&'static dyn KObjType>) {
+        self.inner().kobject_common.kobj_type = ktype;
+    }
+
+    fn name(&self) -> String {
+        self.name.clone()
+    }
+
+    fn set_name(&self, _name: String) {}
+
+    fn kobj_state(&self) -> RwLockReadGuard<KObjectState> {
+        self.kobj_state.read()
+    }
+
+    fn kobj_state_mut(&self) -> RwLockWriteGuard<KObjectState> {
+        self.kobj_state.write()
+    }
+
+    fn set_kobj_state(&self, state: KObjectState) {
+        *self.kobj_state.write() = state;
+    }
+}
+
+impl Device for KprobeDevice {
+    #[inline]
+    #[allow(dead_code)]
+    fn dev_type(&self) -> DeviceType {
+        return DeviceType::Other;
+    }
+
+    #[inline]
+    fn id_table(&self) -> IdTable {
+        IdTable::new("kprobe".to_string(), None)
+    }
+
+    fn bus(&self) -> Option<Weak<dyn Bus>> {
+        self.inner().device_common.bus.clone()
+    }
+
+    fn set_bus(&self, bus: Option<Weak<dyn Bus>>) {
+        self.inner().device_common.bus = bus;
+    }
+
+    fn set_class(&self, class: Option<Weak<dyn Class>>) {
+        self.inner().device_common.class = class;
+    }
+
+    fn driver(&self) -> Option<Arc<dyn Driver>> {
+        self.inner().device_common.driver.clone()?.upgrade()
+    }
+
+    fn set_driver(&self, driver: Option<Weak<dyn Driver>>) {
+        self.inner().device_common.driver = driver;
+    }
+
+    #[inline]
+    fn is_dead(&self) -> bool {
+        false
+    }
+
+    fn can_match(&self) -> bool {
+        todo!()
+    }
+
+    fn set_can_match(&self, _can_match: bool) {
+        todo!()
+    }
+
+    fn state_synced(&self) -> bool {
+        todo!()
+    }
+
+    fn dev_parent(&self) -> Option<Weak<dyn Device>> {
+        self.inner().device_common.get_parent_weak_or_clear()
+    }
+
+    fn set_dev_parent(&self, dev_parent: Option<Weak<dyn Device>>) {
+        self.inner().device_common.parent = dev_parent;
+    }
+}
+
+#[derive(Debug)]
+pub struct KprobeAttr;
+
+impl Attribute for KprobeAttr {
+    fn name(&self) -> &str {
+        "type"
+    }
+
+    fn mode(&self) -> ModeType {
+        ModeType::S_IRUGO
+    }
+
+    fn support(&self) -> SysFSOpsSupport {
+        SysFSOpsSupport::ATTR_SHOW
+    }
+    fn show(&self, _kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
+        if buf.is_empty() {
+            return Err(SystemError::EINVAL);
+        }
+        // perf_type_id::PERF_TYPE_MAX
+        buf[0] = b'6';
+        Ok(1)
+    }
+}

+ 31 - 0
kernel/src/misc/events/kprobe/mod.rs

@@ -0,0 +1,31 @@
+use crate::driver::base::device::bus::Bus;
+use crate::driver::base::device::{device_manager, device_register, sys_devices_kset, Device};
+use crate::driver::base::kobject::KObject;
+use crate::init::initcall::INITCALL_DEVICE;
+use crate::misc::events::get_event_source_bus;
+use crate::misc::events::kprobe::device::{KprobeAttr, KprobeDevice};
+use alloc::sync::Arc;
+use system_error::SystemError;
+use unified_init::macros::unified_init;
+
+pub mod device;
+static mut KPROBE_DEVICE: Option<Arc<KprobeDevice>> = None;
+
+#[unified_init(INITCALL_DEVICE)]
+pub fn kprobe_subsys_init() -> Result<(), SystemError> {
+    let kprobe_device = KprobeDevice::new(Some(Arc::downgrade(
+        &(sys_devices_kset() as Arc<dyn KObject>),
+    )));
+
+    let event_source_bus = get_event_source_bus().ok_or(SystemError::EINVAL)?;
+    kprobe_device.set_bus(Some(Arc::downgrade(&(event_source_bus as Arc<dyn Bus>))));
+
+    // 注册到/sys/devices下
+    device_register(kprobe_device.clone())?;
+    unsafe {
+        KPROBE_DEVICE = Some(kprobe_device.clone());
+    }
+
+    device_manager().create_file(&(kprobe_device as Arc<dyn Device>), &KprobeAttr)?;
+    Ok(())
+}

+ 28 - 0
kernel/src/misc/events/mod.rs

@@ -0,0 +1,28 @@
+use crate::driver::base::device::bus::{bus_register, Bus};
+use crate::init::initcall::INITCALL_SUBSYS;
+use crate::misc::events::subsys::EventSourceBus;
+use alloc::sync::Arc;
+use system_error::SystemError;
+use unified_init::macros::unified_init;
+
+mod kprobe;
+mod subsys;
+
+static mut EVENT_SOURCE_BUS: Option<Arc<EventSourceBus>> = None;
+
+fn get_event_source_bus() -> Option<Arc<EventSourceBus>> {
+    unsafe { EVENT_SOURCE_BUS.clone() }
+}
+
+#[unified_init(INITCALL_SUBSYS)]
+pub fn init_event_source_bus() -> Result<(), SystemError> {
+    let event_source_bus = EventSourceBus::new();
+    let r = bus_register(event_source_bus.clone() as Arc<dyn Bus>);
+    if r.is_err() {
+        unsafe { EVENT_SOURCE_BUS = None };
+        return r;
+    }
+    unsafe { EVENT_SOURCE_BUS = Some(event_source_bus.clone()) };
+    // kprobe::kprobe_subsys_init()?;
+    Ok(())
+}

+ 52 - 0
kernel/src/misc/events/subsys.rs

@@ -0,0 +1,52 @@
+use crate::driver::base::device::bus::Bus;
+use crate::driver::base::device::Device;
+use crate::driver::base::subsys::SubSysPrivate;
+use alloc::string::{String, ToString};
+use alloc::sync::{Arc, Weak};
+use system_error::SystemError;
+
+#[derive(Debug)]
+pub struct EventSourceBus {
+    private: SubSysPrivate,
+}
+
+impl EventSourceBus {
+    pub fn new() -> Arc<Self> {
+        let w: Weak<Self> = Weak::new();
+        let private = SubSysPrivate::new("event_source".to_string(), Some(w), None, &[]);
+        let bus = Arc::new(Self { private });
+        bus.subsystem()
+            .set_bus(Some(Arc::downgrade(&(bus.clone() as Arc<dyn Bus>))));
+        return bus;
+    }
+}
+
+impl Bus for EventSourceBus {
+    fn name(&self) -> String {
+        "event_source".to_string()
+    }
+
+    fn dev_name(&self) -> String {
+        self.name()
+    }
+
+    fn root_device(&self) -> Option<Weak<dyn Device>> {
+        None
+    }
+
+    fn remove(&self, _device: &Arc<dyn Device>) -> Result<(), SystemError> {
+        todo!()
+    }
+
+    fn shutdown(&self, _device: &Arc<dyn Device>) {
+        todo!()
+    }
+
+    fn resume(&self, _device: &Arc<dyn Device>) -> Result<(), SystemError> {
+        todo!()
+    }
+
+    fn subsystem(&self) -> &SubSysPrivate {
+        &self.private
+    }
+}

+ 1 - 0
kernel/src/misc/mod.rs

@@ -1 +1,2 @@
+pub mod events;
 pub mod ksysfs;

+ 4 - 26
kernel/src/mm/allocator/page_frame.rs

@@ -5,8 +5,6 @@ use core::{
 
 use crate::{
     arch::{mm::LockedFrameAllocator, MMArch},
-    ipc::shm::shm_manager_lock,
-    libs::spinlock::SpinLockGuard,
     mm::{MemoryManagementArch, PhysAddr, VirtAddr},
 };
 
@@ -173,6 +171,8 @@ impl Iterator for VirtPageFrameIter {
 pub struct PageFrameCount(usize);
 
 impl PageFrameCount {
+    pub const ONE: PageFrameCount = PageFrameCount(1);
+
     // @brief 初始化PageFrameCount
     pub const fn new(count: usize) -> Self {
         return Self(count);
@@ -355,30 +355,8 @@ pub unsafe fn allocate_page_frames(count: PageFrameCount) -> Option<(PhysAddr, P
 ///
 /// @param frame 要释放的第一个页帧
 /// @param count 要释放的页帧数量 (必须是2的n次幂)
-pub unsafe fn deallocate_page_frames(
-    frame: PhysPageFrame,
-    count: PageFrameCount,
-    page_manager_guard: &mut SpinLockGuard<'_, crate::mm::page::PageManager>,
-) {
+pub unsafe fn deallocate_page_frames(frame: PhysPageFrame, count: PageFrameCount) {
     unsafe {
         LockedFrameAllocator.free(frame.phys_address(), count);
-    }
-
-    let mut frame = frame;
-    for _ in 0..count.data() {
-        let paddr = frame.phys_address();
-        let page = page_manager_guard.get(&paddr);
-
-        if let Some(page) = page {
-            // 如果page是共享页,将其共享页信息从SHM_MANAGER中删去
-            let page_guard = page.read_irqsave();
-            if page_guard.shared() {
-                shm_manager_lock().free_id(&page_guard.shm_id().unwrap());
-            }
-        }
-
-        // 将已回收的物理页面对应的Page从PAGE_MANAGER中删去
-        page_manager_guard.remove_page(&paddr);
-        frame = frame.next();
-    }
+    };
 }

+ 23 - 38
kernel/src/mm/fault.rs

@@ -22,7 +22,7 @@ use crate::mm::MemoryManagementArch;
 
 use super::{
     allocator::page_frame::FrameAllocator,
-    page::{page_reclaimer_lock_irqsave, Page, PageFlags},
+    page::{FileMapInfo, Page, PageFlags, PageType},
 };
 
 bitflags! {
@@ -55,7 +55,7 @@ pub struct PageFaultMessage<'a> {
     flags: FaultFlags,
     /// 页表映射器
     mapper: &'a mut PageMapper,
-    /// 缺页的文件页在文件中的偏移
+    /// 缺页的文件页在文件中的偏移页号
     file_pgoff: Option<usize>,
     /// 缺页对应PageCache中的文件页
     page: Option<Arc<Page>>,
@@ -308,32 +308,14 @@ impl PageFaultHandler {
         let cache_page = pfm.page.clone().unwrap();
         let mapper = &mut pfm.mapper;
 
-        let cow_page_phys = mapper.allocator_mut().allocate_one();
-        if cow_page_phys.is_none() {
+        let mut page_manager_guard = page_manager_lock_irqsave();
+        if let Ok(page) =
+            page_manager_guard.copy_page(&cache_page.phys_address(), mapper.allocator_mut())
+        {
+            pfm.cow_page = Some(page.clone());
+        } else {
             return VmFaultReason::VM_FAULT_OOM;
         }
-        let cow_page_phys = cow_page_phys.unwrap();
-
-        let cow_page = Arc::new(Page::new(false, cow_page_phys));
-        pfm.cow_page = Some(cow_page.clone());
-
-        //复制PageCache内容到新的页内
-        let new_frame = MMArch::phys_2_virt(cow_page_phys).unwrap();
-        (new_frame.data() as *mut u8).copy_from_nonoverlapping(
-            MMArch::phys_2_virt(cache_page.read_irqsave().phys_address())
-                .unwrap()
-                .data() as *mut u8,
-            MMArch::PAGE_SIZE,
-        );
-
-        let mut page_manager_guard = page_manager_lock_irqsave();
-
-        // 新页加入页管理器中
-        page_manager_guard.insert(cow_page_phys, &cow_page);
-        cow_page.write_irqsave().set_page_cache_index(
-            cache_page.read_irqsave().page_cache(),
-            cache_page.read_irqsave().index(),
-        );
 
         ret = ret.union(Self::finish_fault(pfm));
 
@@ -608,10 +590,10 @@ impl PageFaultHandler {
                 << MMArch::PAGE_SHIFT);
 
         for pgoff in start_pgoff..=end_pgoff {
-            if let Some(page) = page_cache.get_page(pgoff) {
+            if let Some(page) = page_cache.lock_irqsave().get_page(pgoff) {
                 let page_guard = page.read_irqsave();
                 if page_guard.flags().contains(PageFlags::PG_UPTODATE) {
-                    let phys = page_guard.phys_address();
+                    let phys = page.phys_address();
 
                     let address =
                         VirtAddr::new(addr.data() + ((pgoff - start_pgoff) << MMArch::PAGE_SHIFT));
@@ -642,7 +624,7 @@ impl PageFaultHandler {
         let mapper = &mut pfm.mapper;
         let mut ret = VmFaultReason::empty();
 
-        if let Some(page) = page_cache.get_page(file_pgoff) {
+        if let Some(page) = page_cache.lock_irqsave().get_page(file_pgoff) {
             // TODO 异步从磁盘中预读页面进PageCache
 
             // 直接将PageCache中的页面作为要映射的页面
@@ -669,16 +651,19 @@ impl PageFaultHandler {
             )
             .expect("failed to read file to create pagecache page");
 
-            let page = Arc::new(Page::new(true, new_cache_page));
+            let page = page_manager_lock_irqsave()
+                .create_one_page(
+                    PageType::File(FileMapInfo {
+                        page_cache: page_cache.clone(),
+                        index: file_pgoff,
+                    }),
+                    PageFlags::PG_LRU,
+                    allocator,
+                )
+                .expect("failed to create page");
             pfm.page = Some(page.clone());
 
-            page.write_irqsave().add_flags(PageFlags::PG_LRU);
-            page_manager_lock_irqsave().insert(new_cache_page, &page);
-            page_reclaimer_lock_irqsave().insert_page(new_cache_page, &page);
-            page_cache.add_page(file_pgoff, &page);
-
-            page.write_irqsave()
-                .set_page_cache_index(Some(page_cache), Some(file_pgoff));
+            page_cache.lock_irqsave().add_page(file_pgoff, &page);
         }
         ret
     }
@@ -710,7 +695,7 @@ impl PageFaultHandler {
             cache_page.expect("no cache_page in PageFaultMessage")
         };
 
-        let page_phys = page_to_map.read_irqsave().phys_address();
+        let page_phys = page_to_map.phys_address();
 
         mapper.map_phys(address, page_phys, vma_guard.flags());
         page_to_map.write_irqsave().insert_vma(pfm.vma());

+ 342 - 144
kernel/src/mm/page.rs

@@ -1,4 +1,4 @@
-use alloc::string::ToString;
+use alloc::{string::ToString, vec::Vec};
 use core::{
     fmt::{self, Debug, Error, Formatter},
     marker::PhantomData,
@@ -17,7 +17,7 @@ use lru::LruCache;
 use crate::{
     arch::{interrupt::ipi::send_ipi, mm::LockedFrameAllocator, MMArch},
     exception::ipi::{IpiKind, IpiTarget},
-    filesystem::vfs::{file::PageCache, FilePrivateData},
+    filesystem::{page_cache::PageCache, vfs::FilePrivateData},
     init::initcall::INITCALL_CORE,
     ipc::shm::ShmId,
     libs::{
@@ -29,7 +29,9 @@ use crate::{
 };
 
 use super::{
-    allocator::page_frame::{FrameAllocator, PageFrameCount},
+    allocator::page_frame::{
+        deallocate_page_frames, FrameAllocator, PageFrameCount, PhysPageFrame,
+    },
     syscall::ProtFlags,
     ucontext::LockedVMA,
     MemoryManagementArch, PageTableKind, PhysAddr, VirtAddr,
@@ -74,6 +76,7 @@ impl PageManager {
         }
     }
 
+    #[allow(dead_code)]
     pub fn contains(&self, paddr: &PhysAddr) -> bool {
         self.phys2page.contains_key(paddr)
     }
@@ -91,13 +94,121 @@ impl PageManager {
             .clone()
     }
 
-    pub fn insert(&mut self, paddr: PhysAddr, page: &Arc<Page>) {
-        self.phys2page.insert(paddr, page.clone());
+    fn insert(&mut self, page: &Arc<Page>) -> Result<Arc<Page>, SystemError> {
+        let phys = page.phys_address();
+        if !self.phys2page.contains_key(&phys) {
+            self.phys2page.insert(phys, page.clone());
+            Ok(page.clone())
+        } else {
+            log::error!("phys page: {phys:?} already exists.");
+            Err(SystemError::EINVAL)
+        }
     }
 
     pub fn remove_page(&mut self, paddr: &PhysAddr) {
         self.phys2page.remove(paddr);
     }
+
+    /// # 创建一个新页面并加入管理器
+    ///
+    /// ## 参数
+    ///
+    /// - `shared`: 是否共享
+    /// - `page_type`: 页面类型
+    /// - `flags`: 页面标志
+    /// - `allocator`: 物理页帧分配器
+    ///
+    /// ## 返回值
+    ///
+    /// - `Ok(Arc<Page>)`: 新页面
+    /// - `Err(SystemError)`: 错误码
+    pub fn create_one_page(
+        &mut self,
+        page_type: PageType,
+        flags: PageFlags,
+        allocator: &mut dyn FrameAllocator,
+    ) -> Result<Arc<Page>, SystemError> {
+        self.create_pages(page_type, flags, allocator, PageFrameCount::ONE)?
+            .1
+            .first()
+            .ok_or(SystemError::ENOMEM)
+            .cloned()
+    }
+
+    /// # 创建新页面并加入管理器
+    ///
+    /// ## 参数
+    ///
+    /// - `shared`: 是否共享
+    /// - `page_type`: 页面类型
+    /// - `flags`: 页面标志
+    /// - `allocator`: 物理页帧分配器
+    /// - `count`: 页面数量
+    ///
+    /// ## 返回值
+    ///
+    /// - `Ok((PhysAddr, Vec<Arc<Page>>))`: 页面起始物理地址,新页面集合
+    /// - `Err(SystemError)`: 错误码
+    pub fn create_pages(
+        &mut self,
+        page_type: PageType,
+        flags: PageFlags,
+        allocator: &mut dyn FrameAllocator,
+        count: PageFrameCount,
+    ) -> Result<(PhysAddr, Vec<Arc<Page>>), SystemError> {
+        compiler_fence(Ordering::SeqCst);
+        let (start_paddr, count) = unsafe { allocator.allocate(count).ok_or(SystemError::ENOMEM)? };
+        compiler_fence(Ordering::SeqCst);
+
+        unsafe {
+            let vaddr = MMArch::phys_2_virt(start_paddr).unwrap();
+            MMArch::write_bytes(vaddr, 0, MMArch::PAGE_SIZE * count.data());
+        }
+
+        let mut cur_phys = PhysPageFrame::new(start_paddr);
+        let mut ret: Vec<Arc<Page>> = Vec::new();
+        for _ in 0..count.data() {
+            let page = Page::new(cur_phys.phys_address(), page_type.clone(), flags);
+            if let Err(e) = self.insert(&page) {
+                for insert_page in ret {
+                    self.remove_page(&insert_page.read_irqsave().phys_addr);
+                }
+                return Err(e);
+            }
+            ret.push(page);
+            cur_phys = cur_phys.next();
+        }
+        Ok((start_paddr, ret))
+    }
+
+    /// # 拷贝管理器中原有页面并加入管理器,同时拷贝原页面内容
+    ///
+    /// ## 参数
+    ///
+    /// - `old_phys`: 原页面的物理地址
+    /// - `allocator`: 物理页帧分配器
+    ///
+    /// ## 返回值
+    ///
+    /// - `Ok(Arc<Page>)`: 新页面
+    /// - `Err(SystemError)`: 错误码
+    pub fn copy_page(
+        &mut self,
+        old_phys: &PhysAddr,
+        allocator: &mut dyn FrameAllocator,
+    ) -> Result<Arc<Page>, SystemError> {
+        let old_page = self.get(old_phys).ok_or(SystemError::EINVAL)?;
+        let paddr = unsafe { allocator.allocate_one().ok_or(SystemError::ENOMEM)? };
+
+        assert!(!self.contains(&paddr), "phys page: {paddr:?} already exist");
+
+        let page = Page::copy(old_page.read_irqsave(), paddr)
+            .inspect_err(|_| unsafe { allocator.free_one(paddr) })?;
+
+        self.insert(&page)?;
+
+        Ok(page)
+    }
 }
 
 pub static mut PAGE_RECLAIMER: Option<SpinLock<PageReclaimer>> = None;
@@ -150,7 +261,7 @@ fn page_reclaim_thread() -> i32 {
             page_reclaimer_lock_irqsave().flush_dirty_pages();
             // 休眠5秒
             // log::info!("sleep");
-            let _ = nanosleep(PosixTimeSpec::new(5, 0));
+            let _ = nanosleep(PosixTimeSpec::new(0, 500_000_000));
         }
     }
 }
@@ -180,28 +291,31 @@ impl PageReclaimer {
         self.lru.put(paddr, page.clone());
     }
 
+    pub fn remove_page(&mut self, paddr: &PhysAddr) -> Option<Arc<Page>> {
+        self.lru.pop(paddr)
+    }
+
     /// lru链表缩减
     /// ## 参数
     ///
     /// - `count`: 需要缩减的页面数量
     pub fn shrink_list(&mut self, count: PageFrameCount) {
         for _ in 0..count.data() {
-            let (paddr, page) = self.lru.pop_lru().expect("pagecache is empty");
-            let page_cache = page.read_irqsave().page_cache().unwrap();
-            for vma in page.read_irqsave().anon_vma() {
-                let address_space = vma.lock_irqsave().address_space().unwrap();
-                let address_space = address_space.upgrade().unwrap();
-                let mut guard = address_space.write();
-                let mapper = &mut guard.user_mapper.utable;
-                let virt = vma.lock_irqsave().page_address(&page).unwrap();
-                unsafe {
-                    mapper.unmap(virt, false).unwrap().flush();
+            let (_, page) = self.lru.pop_lru().expect("pagecache is empty");
+            let mut guard = page.write_irqsave();
+            if let PageType::File(info) = guard.page_type().clone() {
+                let page_cache = &info.page_cache;
+                let page_index = info.index;
+                let paddr = guard.phys_address();
+                if guard.flags().contains(PageFlags::PG_DIRTY) {
+                    // 先回写脏页
+                    Self::page_writeback(&mut guard, true);
                 }
-            }
-            page_cache.remove_page(page.read_irqsave().index().unwrap());
-            page_manager_lock_irqsave().remove_page(&paddr);
-            if page.read_irqsave().flags.contains(PageFlags::PG_DIRTY) {
-                Self::page_writeback(&page, true);
+
+                // 删除页面
+                page_cache.lock_irqsave().remove_page(page_index);
+                page_manager_lock_irqsave().remove_page(&paddr);
+                self.remove_page(&paddr);
             }
         }
     }
@@ -215,24 +329,33 @@ impl PageReclaimer {
     /// 脏页回写函数
     /// ## 参数
     ///
-    /// - `page`: 需要回写的脏页
+    /// - `guard`: 需要回写的脏页
     /// - `unmap`: 是否取消映射
     ///
     /// ## 返回值
     /// - VmFaultReason: 页面错误处理信息标志
-    pub fn page_writeback(page: &Arc<Page>, unmap: bool) {
-        if !unmap {
-            page.write_irqsave().remove_flags(PageFlags::PG_DIRTY);
-        }
+    pub fn page_writeback(guard: &mut RwLockWriteGuard<InnerPage>, unmap: bool) {
+        // log::debug!("page writeback: {:?}", guard.phys_addr);
+
+        let (page_cache, page_index) = match guard.page_type() {
+            PageType::File(info) => (info.page_cache.clone(), info.index),
+            _ => {
+                log::warn!("try to writeback a non-file page");
+                return;
+            }
+        };
+        let paddr = guard.phys_address();
+        let inode = page_cache.inode().clone().unwrap().upgrade().unwrap();
 
-        for vma in page.read_irqsave().anon_vma() {
+        for vma in guard.vma_set() {
             let address_space = vma.lock_irqsave().address_space().unwrap();
             let address_space = address_space.upgrade().unwrap();
             let mut guard = address_space.write();
             let mapper = &mut guard.user_mapper.utable;
-            let virt = vma.lock_irqsave().page_address(page).unwrap();
+            let virt = vma.lock_irqsave().page_address(page_index).unwrap();
             if unmap {
                 unsafe {
+                    // 取消页表映射
                     mapper.unmap(virt, false).unwrap().flush();
                 }
             } else {
@@ -245,40 +368,44 @@ impl PageReclaimer {
                 };
             }
         }
-        let inode = page
-            .read_irqsave()
-            .page_cache
-            .clone()
-            .unwrap()
-            .inode()
-            .clone()
-            .unwrap()
-            .upgrade()
-            .unwrap();
+
+        let len = if let Ok(metadata) = inode.metadata() {
+            let size = metadata.size as usize;
+            if size < page_index * MMArch::PAGE_SIZE {
+                0
+            } else {
+                size - page_index * MMArch::PAGE_SIZE
+            }
+        } else {
+            MMArch::PAGE_SIZE
+        };
+
         inode
-            .write_at(
-                page.read_irqsave().index().unwrap(),
-                MMArch::PAGE_SIZE,
+            .write_direct(
+                page_index * MMArch::PAGE_SIZE,
+                len,
                 unsafe {
                     core::slice::from_raw_parts(
-                        MMArch::phys_2_virt(page.read_irqsave().phys_addr)
-                            .unwrap()
-                            .data() as *mut u8,
-                        MMArch::PAGE_SIZE,
+                        MMArch::phys_2_virt(paddr).unwrap().data() as *mut u8,
+                        len,
                     )
                 },
                 SpinLock::new(FilePrivateData::Unused).lock(),
             )
             .unwrap();
+
+        // 清除标记
+        guard.remove_flags(PageFlags::PG_DIRTY);
     }
 
     /// lru脏页刷新
-    pub fn flush_dirty_pages(&self) {
+    pub fn flush_dirty_pages(&mut self) {
         // log::info!("flush_dirty_pages");
         let iter = self.lru.iter();
-        for (_, page) in iter {
-            if page.read_irqsave().flags().contains(PageFlags::PG_DIRTY) {
-                Self::page_writeback(page, false);
+        for (_paddr, page) in iter {
+            let mut guard = page.write_irqsave();
+            if guard.flags().contains(PageFlags::PG_DIRTY) {
+                Self::page_writeback(&mut guard, false);
             }
         }
     }
@@ -302,20 +429,76 @@ bitflags! {
         const PG_PRIVATE = 1 << 15;
         const PG_RECLAIM = 1 << 18;
         const PG_SWAPBACKED = 1 << 19;
+        const PG_UNEVICTABLE = 1 << 20;
     }
 }
 
 #[derive(Debug)]
 pub struct Page {
     inner: RwLock<InnerPage>,
+    /// 页面所在物理地址
+    phys_addr: PhysAddr,
 }
 
 impl Page {
-    pub fn new(shared: bool, phys_addr: PhysAddr) -> Self {
-        let inner = InnerPage::new(shared, phys_addr);
-        Self {
+    /// # 创建新页面
+    ///
+    /// ## 参数
+    ///
+    /// - `shared`: 是否共享
+    /// - `phys_addr`: 物理地址
+    /// - `page_type`: 页面类型
+    /// - `flags`: 页面标志
+    ///
+    /// ## 返回值
+    ///
+    /// - `Arc<Page>`: 新页面
+    fn new(phys_addr: PhysAddr, page_type: PageType, flags: PageFlags) -> Arc<Page> {
+        let inner = InnerPage::new(phys_addr, page_type, flags);
+        let page = Arc::new(Self {
             inner: RwLock::new(inner),
+            phys_addr,
+        });
+        if page.read_irqsave().flags == PageFlags::PG_LRU {
+            page_reclaimer_lock_irqsave().insert_page(phys_addr, &page);
+        };
+        page
+    }
+
+    /// # 拷贝页面及内容
+    ///
+    /// ## 参数
+    ///
+    /// - `old_guard`: 源页面的读守卫
+    /// - `new_phys`: 新页面的物理地址
+    ///
+    /// ## 返回值
+    ///
+    /// - `Ok(Arc<Page>)`: 新页面
+    /// - `Err(SystemError)`: 错误码
+    fn copy(
+        old_guard: RwLockReadGuard<InnerPage>,
+        new_phys: PhysAddr,
+    ) -> Result<Arc<Page>, SystemError> {
+        let page_type = old_guard.page_type().clone();
+        let flags = *old_guard.flags();
+        let inner = InnerPage::new(new_phys, page_type, flags);
+        unsafe {
+            let old_vaddr =
+                MMArch::phys_2_virt(old_guard.phys_address()).ok_or(SystemError::EFAULT)?;
+            let new_vaddr = MMArch::phys_2_virt(new_phys).ok_or(SystemError::EFAULT)?;
+            (new_vaddr.data() as *mut u8)
+                .copy_from_nonoverlapping(old_vaddr.data() as *mut u8, MMArch::PAGE_SIZE);
         }
+        Ok(Arc::new(Self {
+            inner: RwLock::new(inner),
+            phys_addr: new_phys,
+        }))
+    }
+
+    #[inline(always)]
+    pub fn phys_address(&self) -> PhysAddr {
+        self.phys_addr
     }
 
     pub fn read_irqsave(&self) -> RwLockReadGuard<InnerPage> {
@@ -330,107 +513,68 @@ impl Page {
 #[derive(Debug)]
 /// 物理页面信息
 pub struct InnerPage {
-    /// 映射计数
-    map_count: usize,
-    /// 是否为共享页
-    shared: bool,
-    /// 映射计数为0时,是否可回收
-    free_when_zero: bool,
-    /// 共享页id(如果是共享页)
-    shm_id: Option<ShmId>,
     /// 映射到当前page的VMA
-    anon_vma: HashSet<Arc<LockedVMA>>,
+    vma_set: HashSet<Arc<LockedVMA>>,
     /// 标志
     flags: PageFlags,
-    /// 页所在的物理页帧号
+    /// 页面所在物理地址
     phys_addr: PhysAddr,
-    /// 在pagecache中的偏移
-    index: Option<usize>,
-    page_cache: Option<Arc<PageCache>>,
+    /// 页面类型
+    page_type: PageType,
 }
 
 impl InnerPage {
-    pub fn new(shared: bool, phys_addr: PhysAddr) -> Self {
-        let dealloc_when_zero = !shared;
+    pub fn new(phys_addr: PhysAddr, page_type: PageType, flags: PageFlags) -> Self {
         Self {
-            map_count: 0,
-            shared,
-            free_when_zero: dealloc_when_zero,
-            shm_id: None,
-            anon_vma: HashSet::new(),
-            flags: PageFlags::empty(),
+            vma_set: HashSet::new(),
+            flags,
             phys_addr,
-            index: None,
-            page_cache: None,
+            page_type,
         }
     }
 
     /// 将vma加入anon_vma
     pub fn insert_vma(&mut self, vma: Arc<LockedVMA>) {
-        self.anon_vma.insert(vma);
-        self.map_count += 1;
+        self.vma_set.insert(vma);
     }
 
     /// 将vma从anon_vma中删去
     pub fn remove_vma(&mut self, vma: &LockedVMA) {
-        self.anon_vma.remove(vma);
-        self.map_count -= 1;
+        self.vma_set.remove(vma);
     }
 
     /// 判断当前物理页是否能被回
     pub fn can_deallocate(&self) -> bool {
-        self.map_count == 0 && self.free_when_zero
+        self.map_count() == 0 && !self.flags.contains(PageFlags::PG_UNEVICTABLE)
     }
 
     pub fn shared(&self) -> bool {
-        self.shared
-    }
-
-    pub fn shm_id(&self) -> Option<ShmId> {
-        self.shm_id
-    }
-
-    pub fn index(&self) -> Option<usize> {
-        self.index
+        self.map_count() > 1
     }
 
     pub fn page_cache(&self) -> Option<Arc<PageCache>> {
-        self.page_cache.clone()
-    }
-
-    pub fn set_page_cache(&mut self, page_cache: Option<Arc<PageCache>>) {
-        self.page_cache = page_cache;
-    }
-
-    pub fn set_index(&mut self, index: Option<usize>) {
-        self.index = index;
-    }
-
-    pub fn set_page_cache_index(
-        &mut self,
-        page_cache: Option<Arc<PageCache>>,
-        index: Option<usize>,
-    ) {
-        self.page_cache = page_cache;
-        self.index = index;
+        match &self.page_type {
+            PageType::File(info) => Some(info.page_cache.clone()),
+            _ => None,
+        }
     }
 
-    pub fn set_shm_id(&mut self, shm_id: ShmId) {
-        self.shm_id = Some(shm_id);
+    pub fn page_type(&self) -> &PageType {
+        &self.page_type
     }
 
-    pub fn set_dealloc_when_zero(&mut self, dealloc_when_zero: bool) {
-        self.free_when_zero = dealloc_when_zero;
+    pub fn set_page_type(&mut self, page_type: PageType) {
+        self.page_type = page_type;
     }
 
     #[inline(always)]
-    pub fn anon_vma(&self) -> &HashSet<Arc<LockedVMA>> {
-        &self.anon_vma
+    pub fn vma_set(&self) -> &HashSet<Arc<LockedVMA>> {
+        &self.vma_set
     }
 
     #[inline(always)]
     pub fn map_count(&self) -> usize {
-        self.map_count
+        self.vma_set.len()
     }
 
     #[inline(always)]
@@ -454,9 +598,83 @@ impl InnerPage {
     }
 
     #[inline(always)]
-    pub fn phys_address(&self) -> PhysAddr {
+    fn phys_address(&self) -> PhysAddr {
         self.phys_addr
     }
+
+    pub unsafe fn as_slice(&self) -> &[u8] {
+        core::slice::from_raw_parts(
+            MMArch::phys_2_virt(self.phys_addr).unwrap().data() as *const u8,
+            MMArch::PAGE_SIZE,
+        )
+    }
+
+    pub unsafe fn as_slice_mut(&mut self) -> &mut [u8] {
+        core::slice::from_raw_parts_mut(
+            MMArch::phys_2_virt(self.phys_addr).unwrap().data() as *mut u8,
+            MMArch::PAGE_SIZE,
+        )
+    }
+
+    pub unsafe fn copy_from_slice(&mut self, slice: &[u8]) {
+        assert_eq!(
+            slice.len(),
+            MMArch::PAGE_SIZE,
+            "length of slice not match PAGE_SIZE"
+        );
+        core::slice::from_raw_parts_mut(
+            MMArch::phys_2_virt(self.phys_addr).unwrap().data() as *mut u8,
+            MMArch::PAGE_SIZE,
+        )
+        .copy_from_slice(slice);
+    }
+
+    pub unsafe fn truncate(&mut self, len: usize) {
+        if len > MMArch::PAGE_SIZE {
+            return;
+        }
+
+        let vaddr = unsafe { MMArch::phys_2_virt(self.phys_addr).unwrap() };
+
+        unsafe {
+            core::slice::from_raw_parts_mut(
+                (vaddr.data() + len) as *mut u8,
+                MMArch::PAGE_SIZE - len,
+            )
+            .fill(0)
+        };
+    }
+}
+
+impl Drop for InnerPage {
+    fn drop(&mut self) {
+        assert!(
+            self.map_count() == 0,
+            "page drop when map count is non-zero"
+        );
+
+        unsafe {
+            deallocate_page_frames(PhysPageFrame::new(self.phys_addr), PageFrameCount::new(1))
+        };
+    }
+}
+
+/// 页面类型,包含额外的页面信息
+#[derive(Debug, Clone)]
+pub enum PageType {
+    /// 普通页面,不含额外信息
+    Normal,
+    /// 文件映射页,含文件映射相关信息
+    File(FileMapInfo),
+    /// 共享内存页,记录ShmId
+    Shm(ShmId),
+}
+
+#[derive(Debug, Clone)]
+pub struct FileMapInfo {
+    pub page_cache: Arc<PageCache>,
+    /// 在pagecache中的偏移
+    pub index: usize,
 }
 
 #[derive(Debug)]
@@ -634,23 +852,7 @@ impl<Arch: MemoryManagementArch> PageTable<Arch> {
                             let phys = allocator.allocate_one()?;
                             let mut page_manager_guard = page_manager_lock_irqsave();
                             let old_phys = entry.address().unwrap();
-                            let old_page = page_manager_guard.get_unwrap(&old_phys);
-                            let new_page =
-                                Arc::new(Page::new(old_page.read_irqsave().shared(), phys));
-                            if let Some(ref page_cache) = old_page.read_irqsave().page_cache() {
-                                new_page.write_irqsave().set_page_cache_index(
-                                    Some(page_cache.clone()),
-                                    old_page.read_irqsave().index(),
-                                );
-                            }
-
-                            page_manager_guard.insert(phys, &new_page);
-                            let old_phys = entry.address().unwrap();
-                            let frame = MMArch::phys_2_virt(phys).unwrap().data() as *mut u8;
-                            frame.copy_from_nonoverlapping(
-                                MMArch::phys_2_virt(old_phys).unwrap().data() as *mut u8,
-                                MMArch::PAGE_SIZE,
-                            );
+                            page_manager_guard.copy_page(&old_phys, allocator).ok()?;
                             new_table.set_entry(i, PageEntry::new(phys, entry.flags()));
                         }
                     }
@@ -1180,21 +1382,17 @@ impl<Arch: MemoryManagementArch, F: FrameAllocator> PageMapper<Arch, F> {
         virt: VirtAddr,
         flags: EntryFlags<Arch>,
     ) -> Option<PageFlush<Arch>> {
-        compiler_fence(Ordering::SeqCst);
-        let phys: PhysAddr = self.frame_allocator.allocate_one()?;
-        compiler_fence(Ordering::SeqCst);
-
-        unsafe {
-            let vaddr = MMArch::phys_2_virt(phys).unwrap();
-            MMArch::write_bytes(vaddr, 0, MMArch::PAGE_SIZE);
-        }
-
         let mut page_manager_guard: SpinLockGuard<'static, PageManager> =
             page_manager_lock_irqsave();
-        if !page_manager_guard.contains(&phys) {
-            page_manager_guard.insert(phys, &Arc::new(Page::new(false, phys)))
-        }
+        let page = page_manager_guard
+            .create_one_page(
+                PageType::Normal,
+                PageFlags::empty(),
+                &mut self.frame_allocator,
+            )
+            .ok()?;
         drop(page_manager_guard);
+        let phys = page.phys_address();
         return self.map_phys(virt, phys, flags);
     }
 

+ 34 - 18
kernel/src/mm/ucontext.rs

@@ -21,6 +21,7 @@ use crate::{
     arch::{mm::PageMapper, CurrentIrqArch, MMArch},
     exception::InterruptArch,
     filesystem::vfs::file::File,
+    ipc::shm::{shm_manager_lock, ShmFlags},
     libs::{
         align::page_align_up,
         rwlock::RwLock,
@@ -35,7 +36,7 @@ use super::{
     allocator::page_frame::{
         deallocate_page_frames, PageFrameCount, PhysPageFrame, VirtPageFrame, VirtPageFrameIter,
     },
-    page::{EntryFlags, Flusher, InactiveFlusher, Page, PageFlushAll},
+    page::{EntryFlags, Flusher, InactiveFlusher, PageFlushAll, PageType},
     syscall::{MadvFlags, MapFlags, MremapFlags, ProtFlags},
     MemoryManagementArch, PageTableKind, VirtAddr, VirtRegion, VmFlags,
 };
@@ -841,7 +842,6 @@ impl Drop for UserMapper {
             deallocate_page_frames(
                 PhysPageFrame::new(self.utable.table().phys()),
                 PageFrameCount::new(1),
-                &mut page_manager_lock_irqsave(),
             )
         };
     }
@@ -1152,12 +1152,35 @@ impl LockedVMA {
 
     pub fn unmap(&self, mapper: &mut PageMapper, mut flusher: impl Flusher<MMArch>) {
         // todo: 如果当前vma与文件相关,完善文件相关的逻辑
-
         let mut guard = self.lock_irqsave();
 
         // 获取物理页的anon_vma的守卫
         let mut page_manager_guard: SpinLockGuard<'_, crate::mm::page::PageManager> =
             page_manager_lock_irqsave();
+
+        // 获取映射的物理地址
+        if let Some((paddr, _flags)) = mapper.translate(guard.region().start()) {
+            // 如果是共享页,执行释放操作
+            let page = page_manager_guard.get(&paddr).unwrap();
+            let page_guard = page.read_irqsave();
+            if let PageType::Shm(shm_id) = page_guard.page_type() {
+                let mut shm_manager_guard = shm_manager_lock();
+                if let Some(kernel_shm) = shm_manager_guard.get_mut(shm_id) {
+                    // 更新最后一次断开连接时间
+                    kernel_shm.update_dtim();
+
+                    // 映射计数减少
+                    kernel_shm.decrease_count();
+
+                    // 释放shm_id
+                    if kernel_shm.map_count() == 0 && kernel_shm.mode().contains(ShmFlags::SHM_DEST)
+                    {
+                        shm_manager_guard.free_id(shm_id);
+                    }
+                }
+            }
+        }
+
         for page in guard.region.pages() {
             if mapper.translate(page.virt_address()).is_none() {
                 continue;
@@ -1167,18 +1190,13 @@ impl LockedVMA {
 
             // 从anon_vma中删除当前VMA
             let page = page_manager_guard.get_unwrap(&paddr);
-            page.write_irqsave().remove_vma(self);
-
-            // 如果物理页的anon_vma链表长度为0并且不是共享页,则释放物理页.
-            if page.read_irqsave().can_deallocate() {
-                unsafe {
-                    drop(page);
-                    deallocate_page_frames(
-                        PhysPageFrame::new(paddr),
-                        PageFrameCount::new(1),
-                        &mut page_manager_guard,
-                    )
-                };
+            let mut page_guard = page.write_irqsave();
+            page_guard.remove_vma(self);
+
+            // 如果物理页的vma链表长度为0并且未标记为不可回收,则释放物理页.
+            // TODO 后续由lru释放物理页面
+            if page_guard.can_deallocate() {
+                page_manager_guard.remove_page(&paddr);
             }
 
             flusher.consume(flush);
@@ -1659,9 +1677,7 @@ impl VMA {
         return Ok(r);
     }
 
-    pub fn page_address(&self, page: &Arc<Page>) -> Result<VirtAddr, SystemError> {
-        let page_guard = page.read_irqsave();
-        let index = page_guard.index().unwrap();
+    pub fn page_address(&self, index: usize) -> Result<VirtAddr, SystemError> {
         if index >= self.file_pgoff.unwrap() {
             let address =
                 self.region.start + ((index - self.file_pgoff.unwrap()) << MMArch::PAGE_SHIFT);

+ 10 - 4
kernel/src/net/event_poll/mod.rs

@@ -476,8 +476,8 @@ impl EventPoll {
                 }
 
                 // 如果有未处理的信号则返回错误
-                if current_pcb.sig_info_irqsave().sig_pending().signal().bits() != 0 {
-                    return Err(SystemError::EINTR);
+                if current_pcb.has_pending_signal_fast() {
+                    return Err(SystemError::ERESTARTSYS);
                 }
 
                 // 还未等待到事件发生,则睡眠
@@ -488,12 +488,18 @@ impl EventPoll {
                     let jiffies = next_n_us_timer_jiffies(
                         (timespec.tv_sec * 1000000 + timespec.tv_nsec / 1000) as u64,
                     );
-                    let inner = Timer::new(handle, jiffies);
+                    let inner: Arc<Timer> = Timer::new(handle, jiffies);
                     inner.activate();
                     timer = Some(inner);
                 }
                 let guard = epoll.0.lock_irqsave();
-                unsafe { guard.epoll_wq.sleep_without_schedule() };
+                // 睡眠,等待事件发生
+                // 如果wq已经dead,则直接返回错误
+                unsafe { guard.epoll_wq.sleep_without_schedule() }.inspect_err(|_| {
+                    if let Some(timer) = timer.as_ref() {
+                        timer.cancel();
+                    }
+                })?;
                 drop(guard);
                 schedule(SchedMode::SM_NONE);
                 // 被唤醒后,检查是否有事件可读

+ 2 - 2
kernel/src/net/event_poll/syscall.rs

@@ -3,7 +3,7 @@ use system_error::SystemError;
 use crate::{
     arch::ipc::signal::SigSet,
     filesystem::vfs::file::FileMode,
-    ipc::signal::set_current_sig_blocked,
+    ipc::signal::set_current_blocked,
     mm::VirtAddr,
     syscall::{
         user_access::{UserBufferReader, UserBufferWriter},
@@ -96,7 +96,7 @@ impl Syscall {
         sigmask: &mut SigSet,
     ) -> Result<usize, SystemError> {
         // 设置屏蔽的信号
-        set_current_sig_blocked(sigmask);
+        set_current_blocked(sigmask);
 
         let wait_ret = Self::epoll_wait(epfd, epoll_event, max_events, timespec);
 

+ 68 - 60
kernel/src/perf/bpf.rs

@@ -1,14 +1,15 @@
 use super::{PerfEventOps, Result};
 use crate::arch::mm::LockedFrameAllocator;
 use crate::arch::MMArch;
-use crate::filesystem::vfs::file::PageCache;
+use crate::filesystem::page_cache::PageCache;
 use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
 use crate::include::bindings::linux_bpf::{
     perf_event_header, perf_event_mmap_page, perf_event_type,
 };
+use crate::libs::align::page_align_up;
 use crate::libs::spinlock::{SpinLock, SpinLockGuard};
-use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame};
-use crate::mm::page::{page_manager_lock_irqsave, Page};
+use crate::mm::allocator::page_frame::{PageFrameCount, PhysPageFrame};
+use crate::mm::page::{page_manager_lock_irqsave, PageFlags, PageType};
 use crate::mm::{MemoryManagementArch, PhysAddr};
 use crate::perf::util::{LostSamples, PerfProbeArgs, PerfSample, SampleHeader};
 use alloc::string::String;
@@ -79,54 +80,28 @@ impl RingPage {
         }
     }
 
+    #[inline]
     fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool {
-        if (data_head + 1) % self.data_region_size == data_tail {
-            // The buffer is full
-            return false;
-        }
-        let capacity = if data_head >= data_tail {
-            self.data_region_size - data_head + data_tail
-        } else {
-            data_tail - data_head
-        };
+        let capacity = self.data_region_size - data_head + data_tail;
         data_size <= capacity
     }
 
     pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
         let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail };
         let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head };
-        // data_tail..data_head is the region that can be written
-        // check if there is enough space to write the event
-        let sample_size = PerfSample::calculate_size(data.len());
 
-        let can_write_sample =
-            self.can_write(sample_size, *data_tail as usize, *data_head as usize);
-        // log::error!(
-        //     "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}",
-        //     can_write_sample,
-        //     *data_tail,
-        //     *data_head,
-        //     data.len(),
-        //     self.data_region_size
-        // );
-        if !can_write_sample {
-            //we need record it to the lost record
-            self.lost += 1;
-            // log::error!(
-            //     "Lost record: {}, data_tail: {}, data_head: {}",
-            //     self.lost,
-            //     *data_tail,
-            //     *data_head
-            // );
-            Ok(())
-        } else {
-            // we can write the sample to the page
-            // If the lost record is not zero, we need to write the lost record first.
+        // user lib will update the tail after read the data,but it will not % data_region_size
+        let perf_header_size = size_of::<perf_event_header>();
+        let can_write_perf_header =
+            self.can_write(perf_header_size, *data_tail as usize, *data_head as usize);
+
+        if can_write_perf_header {
             let can_write_lost_record = self.can_write(
                 size_of::<LostSamples>(),
                 *data_tail as usize,
                 *data_head as usize,
             );
+            // if there is lost record, we need to write the lost record first
             if self.lost > 0 && can_write_lost_record {
                 let new_data_head = self.write_lost(*data_head as usize)?;
                 *data_head = new_data_head as u64;
@@ -137,8 +112,21 @@ impl RingPage {
                 //     *data_head
                 // );
                 self.lost = 0;
-                self.write_event(data)
-            } else {
+                // try to write the event again
+                return self.write_event(data);
+            }
+            let sample_size = PerfSample::calculate_size(data.len());
+            let can_write_sample =
+                self.can_write(sample_size, *data_tail as usize, *data_head as usize);
+            // log::error!(
+            //     "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}",
+            //     can_write_sample,
+            //     *data_tail,
+            //     *data_head,
+            //     data.len(),
+            //     self.data_region_size
+            // );
+            if can_write_sample {
                 let new_data_head = self.write_sample(data, *data_head as usize)?;
                 *data_head = new_data_head as u64;
                 // log::info!(
@@ -146,20 +134,24 @@ impl RingPage {
                 //     *data_tail,
                 //     *data_head
                 // );
-                Ok(())
+            } else {
+                self.lost += 1;
             }
+        } else {
+            self.lost += 1;
         }
+        Ok(())
     }
 
     /// Write any data to the page.
     ///
     /// Return the new data_head
-    fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
+    fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<()> {
         let data_region_len = self.data_region_size;
         let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut();
         let data_len = data.len();
+        let start = data_head % data_region_len;
         let end = (data_head + data_len) % data_region_len;
-        let start = data_head;
         if start < end {
             data_region[start..end].copy_from_slice(data);
         } else {
@@ -167,40 +159,57 @@ impl RingPage {
             data_region[start..start + first_len].copy_from_slice(&data[..first_len]);
             data_region[0..end].copy_from_slice(&data[first_len..]);
         }
-        Ok(end)
+        Ok(())
+    }
+    #[inline]
+    fn fill_size(&self, data_head_mod: usize) -> usize {
+        if self.data_region_size - data_head_mod < size_of::<perf_event_header>() {
+            // The remaining space is not enough to write the perf_event_header
+            // We need to fill the remaining space with 0
+            self.data_region_size - data_head_mod
+        } else {
+            0
+        }
     }
 
     /// Write a sample to the page.
     fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
+        let sample_size = PerfSample::calculate_size(data.len());
+        let maybe_end = (data_head + sample_size) % self.data_region_size;
+        let fill_size = self.fill_size(maybe_end);
         let perf_sample = PerfSample {
             s_hdr: SampleHeader {
                 header: perf_event_header {
                     type_: perf_event_type::PERF_RECORD_SAMPLE as u32,
                     misc: 0,
-                    size: size_of::<SampleHeader>() as u16 + data.len() as u16,
+                    size: size_of::<SampleHeader>() as u16 + data.len() as u16 + fill_size as u16,
                 },
                 size: data.len() as u32,
             },
             value: data,
         };
-        let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
-        self.write_any(perf_sample.value, new_head)
+        self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
+        self.write_any(perf_sample.value, data_head + size_of::<SampleHeader>())?;
+        Ok(data_head + sample_size + fill_size)
     }
 
     /// Write a lost record to the page.
     ///
     /// Return the new data_head
     fn write_lost(&mut self, data_head: usize) -> Result<usize> {
+        let maybe_end = (data_head + size_of::<LostSamples>()) % self.data_region_size;
+        let fill_size = self.fill_size(maybe_end);
         let lost = LostSamples {
             header: perf_event_header {
                 type_: perf_event_type::PERF_RECORD_LOST as u32,
                 misc: 0,
-                size: size_of::<LostSamples>() as u16,
+                size: size_of::<LostSamples>() as u16 + fill_size as u16,
             },
             id: 0,
             count: self.lost as u64,
         };
-        self.write_any(lost.as_bytes(), data_head)
+        self.write_any(lost.as_bytes(), data_head)?;
+        Ok(data_head + size_of::<LostSamples>() + fill_size)
     }
 
     pub fn readable(&self) -> bool {
@@ -232,18 +241,17 @@ impl BpfPerfEvent {
     }
     pub fn do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()> {
         let mut data = self.data.lock();
-        // alloc page frame
-        let (phy_addr, page_count) =
-            unsafe { LockedFrameAllocator.allocate(PageFrameCount::new(len / PAGE_SIZE)) }
-                .ok_or(SystemError::ENOSPC)?;
         let mut page_manager_guard = page_manager_lock_irqsave();
-        let mut cur_phys = PhysPageFrame::new(phy_addr);
-        for i in 0..page_count.data() {
-            let page = Arc::new(Page::new(true, cur_phys.phys_address()));
-            let paddr = cur_phys.phys_address();
-            page_manager_guard.insert(paddr, &page);
-            data.page_cache.add_page(i, &page);
-            cur_phys = cur_phys.next();
+        let (phy_addr, pages) = page_manager_guard.create_pages(
+            PageType::Normal,
+            PageFlags::PG_UNEVICTABLE,
+            &mut LockedFrameAllocator,
+            PageFrameCount::new(page_align_up(len) / PAGE_SIZE),
+        )?;
+        for i in 0..pages.len() {
+            data.page_cache
+                .lock_irqsave()
+                .add_page(i, pages.get(i).unwrap());
         }
         let virt_addr = unsafe { MMArch::phys_2_virt(phy_addr) }.ok_or(SystemError::EFAULT)?;
         // create mmap page

+ 10 - 7
kernel/src/perf/kprobe.rs

@@ -5,7 +5,8 @@ use crate::bpf::helper::BPF_HELPER_FUN_SET;
 use crate::bpf::prog::BpfProg;
 use crate::debug::kprobe::args::KprobeInfo;
 use crate::debug::kprobe::{register_kprobe, unregister_kprobe, LockKprobe};
-use crate::filesystem::vfs::file::{File, PageCache};
+use crate::filesystem::page_cache::PageCache;
+use crate::filesystem::vfs::file::File;
 use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
 use crate::libs::casting::DowncastArc;
 use crate::libs::spinlock::SpinLockGuard;
@@ -39,8 +40,10 @@ impl KprobePerfEvent {
             .downcast_arc::<BpfProg>()
             .ok_or(SystemError::EINVAL)?;
         let prog_slice = file.insns();
-        let mut vm =
-            EbpfVmRawOwned::new(Some(prog_slice.to_vec())).map_err(|_| SystemError::EINVAL)?;
+        let mut vm = EbpfVmRawOwned::new(Some(prog_slice.to_vec())).map_err(|e| {
+            log::error!("create ebpf vm failed: {:?}", e);
+            SystemError::EINVAL
+        })?;
         vm.register_helper_set(BPF_HELPER_FUN_SET.get())
             .map_err(|_| SystemError::EINVAL)?;
         // create a callback to execute the ebpf prog
@@ -75,10 +78,10 @@ impl CallBackFunc for KprobePerfCallBack {
                 size_of::<KProbeContext>(),
             )
         };
-        let _res = self
-            .vm
-            .execute_program(probe_context)
-            .map_err(|_| SystemError::EINVAL);
+        let res = self.vm.execute_program(probe_context);
+        if res.is_err() {
+            log::error!("kprobe callback error: {:?}", res);
+        }
     }
 }
 

+ 2 - 1
kernel/src/perf/mod.rs

@@ -2,7 +2,8 @@ mod bpf;
 mod kprobe;
 mod util;
 
-use crate::filesystem::vfs::file::{File, FileMode, PageCache};
+use crate::filesystem::page_cache::PageCache;
+use crate::filesystem::vfs::file::{File, FileMode};
 use crate::filesystem::vfs::syscall::ModeType;
 use crate::filesystem::vfs::{
     FilePrivateData, FileSystem, FileType, FsInfo, IndexNode, Metadata, SuperBlock,

+ 63 - 37
kernel/src/process/exit.rs

@@ -5,13 +5,10 @@ use log::warn;
 use system_error::SystemError;
 
 use crate::{
-    arch::{
-        ipc::signal::{SigChildCode, Signal},
-        CurrentIrqArch,
-    },
-    exception::InterruptArch,
+    arch::ipc::signal::{SigChildCode, Signal},
     sched::{schedule, SchedMode},
     syscall::user_access::UserBufferWriter,
+    time::{sleep::nanosleep, Duration},
 };
 
 use super::{
@@ -108,33 +105,37 @@ pub fn kernel_wait4(
 /// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/exit.c#1573
 fn do_wait(kwo: &mut KernelWaitOption) -> Result<usize, SystemError> {
     let mut retval: Result<usize, SystemError>;
-    // todo: 在signal struct里面增加等待队列,并在这里初始化子进程退出的回调,使得子进程退出时,能唤醒当前进程。
-
-    loop {
-        kwo.no_task_error = Some(SystemError::ECHILD);
-        let child_pcb = ProcessManager::find(kwo.pid).ok_or(SystemError::ECHILD);
-        if kwo.pid_type != PidType::MAX && child_pcb.is_err() {
+    let mut tmp_child_pcb: Option<Arc<ProcessControlBlock>> = None;
+    macro_rules! notask {
+        ($outer: lifetime) => {
             if let Some(err) = &kwo.no_task_error {
                 retval = Err(err.clone());
             } else {
                 retval = Ok(0);
             }
 
-            if !kwo.options.contains(WaitOption::WNOHANG) {
+            if retval.is_err() && !kwo.options.contains(WaitOption::WNOHANG) {
                 retval = Err(SystemError::ERESTARTSYS);
-                if !ProcessManager::current_pcb()
-                    .sig_info_irqsave()
-                    .sig_pending()
-                    .has_pending()
-                {
+                if !ProcessManager::current_pcb().has_pending_signal_fast() {
+                    schedule(SchedMode::SM_PREEMPT);
                     // todo: 增加子进程退出的回调后,这里可以直接等待在自身的child_wait等待队列上。
                     continue;
                 } else {
-                    break;
+                    break $outer;
                 }
             } else {
-                break;
+                break $outer;
             }
+        };
+    }
+    // todo: 在signal struct里面增加等待队列,并在这里初始化子进程退出的回调,使得子进程退出时,能唤醒当前进程。
+
+    'outer: loop {
+        kwo.no_task_error = Some(SystemError::ECHILD);
+        let child_pcb = ProcessManager::find(kwo.pid).ok_or(SystemError::ECHILD);
+
+        if kwo.pid_type != PidType::MAX && child_pcb.is_err() {
+            notask!('outer);
         }
 
         if kwo.pid_type == PidType::PID {
@@ -143,37 +144,62 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result<usize, SystemError> {
             let child_weak = Arc::downgrade(&child_pcb);
             let r = do_waitpid(child_pcb, kwo);
             if let Some(r) = r {
-                return r;
-            } else {
-                child_weak.upgrade().unwrap().wait_queue.sleep();
+                retval = r;
+                break 'outer;
+            } else if let Err(SystemError::ESRCH) = child_weak.upgrade().unwrap().wait_queue.sleep()
+            {
+                // log::debug!("do_wait: child_pcb sleep failed");
+                continue;
             }
         } else if kwo.pid_type == PidType::MAX {
             // 等待任意子进程
-            // todo: 这里有问题!如果正在for循环的过程中,子进程退出了,可能会导致父进程永远等待。
+            // todo: 这里有问题!应当让当前进程sleep到自身的child_wait等待队列上,这样才高效。(还没实现)
             let current_pcb = ProcessManager::current_pcb();
-            let rd_childen = current_pcb.children.read();
-            let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
-            for pid in rd_childen.iter() {
-                let pcb = ProcessManager::find(*pid).ok_or(SystemError::ECHILD)?;
-                let state = pcb.sched_info().inner_lock_read_irqsave().state();
-                if state.is_exited() {
-                    kwo.ret_status = state.exit_code().unwrap() as i32;
-                    drop(pcb);
-                    unsafe { ProcessManager::release(*pid) };
-                    return Ok((*pid).into());
-                } else {
-                    unsafe { pcb.wait_queue.sleep_without_schedule() };
+            loop {
+                let rd_childen = current_pcb.children.read();
+                if rd_childen.is_empty() {
+                    break;
+                }
+                for pid in rd_childen.iter() {
+                    let pcb = ProcessManager::find(*pid).ok_or(SystemError::ECHILD)?;
+                    let sched_guard = pcb.sched_info().inner_lock_read_irqsave();
+                    let state = sched_guard.state();
+                    if state.is_exited() {
+                        kwo.ret_status = state.exit_code().unwrap() as i32;
+                        kwo.no_task_error = None;
+                        // 由于pcb的drop方法里面要获取父进程的children字段的写锁,所以这里不能直接drop pcb,
+                        // 而是要先break到外层循环,以便释放父进程的children字段的锁,才能drop pcb。
+                        // 否则会死锁。
+                        tmp_child_pcb = Some(pcb.clone());
+                        unsafe { ProcessManager::release(*pid) };
+                        retval = Ok((*pid).into());
+                        break 'outer;
+                    }
                 }
+                nanosleep(Duration::from_millis(100).into())?;
             }
-            drop(irq_guard);
-            schedule(SchedMode::SM_NONE);
         } else {
             // todo: 对于pgid的处理
             warn!("kernel_wait4: currently not support {:?}", kwo.pid_type);
             return Err(SystemError::EINVAL);
         }
+
+        notask!('outer);
     }
 
+    drop(tmp_child_pcb);
+    ProcessManager::current_pcb()
+        .sched_info
+        .inner_lock_write_irqsave()
+        .set_state(ProcessState::Runnable);
+
+    // log::debug!(
+    //     "do_wait, kwo.pid: {}, retval = {:?}, kwo: {:?}",
+    //     kwo.pid,
+    //     retval,
+    //     kwo.no_task_error
+    // );
+
     return retval;
 }
 

+ 136 - 51
kernel/src/process/mod.rs

@@ -388,55 +388,68 @@ impl ProcessManager {
     /// - `exit_code` : 进程的退出码
     pub fn exit(exit_code: usize) -> ! {
         // 关中断
-        let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
-        let pcb = ProcessManager::current_pcb();
-        let pid = pcb.pid();
-        pcb.sched_info
-            .inner_lock_write_irqsave()
-            .set_state(ProcessState::Exited(exit_code));
-        pcb.wait_queue.wakeup(Some(ProcessState::Blocked(true)));
-
-        let rq = cpu_rq(smp_get_processor_id().data() as usize);
-        let (rq, guard) = rq.self_lock();
-        rq.deactivate_task(
-            pcb.clone(),
-            DequeueFlag::DEQUEUE_SLEEP | DequeueFlag::DEQUEUE_NOCLOCK,
-        );
-        drop(guard);
+        let _irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+        let pid: Pid;
+        {
+            let pcb = ProcessManager::current_pcb();
+            pid = pcb.pid();
+            pcb.sched_info
+                .inner_lock_write_irqsave()
+                .set_state(ProcessState::Exited(exit_code));
+            pcb.wait_queue.mark_dead();
+            pcb.wait_queue.wakeup_all(Some(ProcessState::Blocked(true)));
+
+            let rq = cpu_rq(smp_get_processor_id().data() as usize);
+            let (rq, guard) = rq.self_lock();
+            rq.deactivate_task(
+                pcb.clone(),
+                DequeueFlag::DEQUEUE_SLEEP | DequeueFlag::DEQUEUE_NOCLOCK,
+            );
+            drop(guard);
 
-        // 进行进程退出后的工作
-        let thread = pcb.thread.write_irqsave();
-        if let Some(addr) = thread.set_child_tid {
-            unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
-        }
+            // 进行进程退出后的工作
+            let thread = pcb.thread.write_irqsave();
+            if let Some(addr) = thread.set_child_tid {
+                unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
+            }
 
-        if let Some(addr) = thread.clear_child_tid {
-            if Arc::strong_count(&pcb.basic().user_vm().expect("User VM Not found")) > 1 {
-                let _ =
-                    Futex::futex_wake(addr, FutexFlag::FLAGS_MATCH_NONE, 1, FUTEX_BITSET_MATCH_ANY);
+            if let Some(addr) = thread.clear_child_tid {
+                if Arc::strong_count(&pcb.basic().user_vm().expect("User VM Not found")) > 1 {
+                    let _ = Futex::futex_wake(
+                        addr,
+                        FutexFlag::FLAGS_MATCH_NONE,
+                        1,
+                        FUTEX_BITSET_MATCH_ANY,
+                    );
+                }
+                unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
             }
-            unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
-        }
 
-        RobustListHead::exit_robust_list(pcb.clone());
+            RobustListHead::exit_robust_list(pcb.clone());
 
-        // 如果是vfork出来的进程,则需要处理completion
-        if thread.vfork_done.is_some() {
-            thread.vfork_done.as_ref().unwrap().complete_all();
-        }
-        drop(thread);
-        unsafe { pcb.basic_mut().set_user_vm(None) };
+            // 如果是vfork出来的进程,则需要处理completion
+            if thread.vfork_done.is_some() {
+                thread.vfork_done.as_ref().unwrap().complete_all();
+            }
+            drop(thread);
+            unsafe { pcb.basic_mut().set_user_vm(None) };
+            pcb.exit_files();
+
+            // TODO 由于未实现进程组,tty记录的前台进程组等于当前进程,故退出前要置空
+            // 后续相关逻辑需要在SYS_EXIT_GROUP系统调用中实现
+            if let Some(tty) = pcb.sig_info_irqsave().tty() {
+                // 临时解决方案!!! 临时解决方案!!! 引入进程组之后,要重写这个更新前台进程组的逻辑
+                let mut g = tty.core().contorl_info_irqsave();
+                if g.pgid == Some(pid) {
+                    g.pgid = None;
+                }
+            }
+            pcb.sig_info_mut().set_tty(None);
 
-        // TODO 由于未实现进程组,tty记录的前台进程组等于当前进程,故退出前要置空
-        // 后续相关逻辑需要在SYS_EXIT_GROUP系统调用中实现
-        if let Some(tty) = pcb.sig_info_irqsave().tty() {
-            tty.core().contorl_info_irqsave().pgid = None;
+            drop(pcb);
+            ProcessManager::exit_notify();
         }
-        pcb.sig_info_mut().set_tty(None);
 
-        drop(pcb);
-        ProcessManager::exit_notify();
-        // unsafe { CurrentIrqArch::interrupt_enable() };
         __schedule(SchedMode::SM_NONE);
         error!("pid {pid:?} exited but sched again!");
         #[allow(clippy::empty_loop)]
@@ -611,6 +624,32 @@ bitflags! {
         const NEED_MIGRATE = 1 << 7;
         /// 随机化的虚拟地址空间,主要用于动态链接器的加载
         const RANDOMIZE = 1 << 8;
+        /// 进程有未处理的信号(这是一个用于快速判断的标志位)
+        /// 相当于Linux的TIF_SIGPENDING
+        const HAS_PENDING_SIGNAL = 1 << 9;
+        /// 进程需要恢复之前保存的信号掩码
+        const RESTORE_SIG_MASK = 1 << 10;
+    }
+}
+
+impl ProcessFlags {
+    pub const fn exit_to_user_mode_work(&self) -> Self {
+        Self::from_bits_truncate(self.bits & (Self::HAS_PENDING_SIGNAL.bits))
+    }
+
+    /// 测试并清除标志位
+    ///
+    /// ## 参数
+    ///
+    /// - `rhs` : 需要测试并清除的标志位
+    ///
+    /// ## 返回值
+    ///
+    /// 如果标志位在清除前是置位的,则返回 `true`,否则返回 `false`
+    pub const fn test_and_clear(&mut self, rhs: Self) -> bool {
+        let r = (self.bits & rhs.bits) != 0;
+        self.bits &= !rhs.bits;
+        r
     }
 }
 #[derive(Debug)]
@@ -672,6 +711,7 @@ pub struct ProcessControlBlock {
 
     /// 进程作为主体的凭证集
     cred: SpinLock<Cred>,
+    self_ref: Weak<ProcessControlBlock>,
 }
 
 impl ProcessControlBlock {
@@ -733,7 +773,7 @@ impl ProcessControlBlock {
         let ppcb: Weak<ProcessControlBlock> = ProcessManager::find(ppid)
             .map(|p| Arc::downgrade(&p))
             .unwrap_or_default();
-        let pcb = Self {
+        let mut pcb = Self {
             pid,
             tgid: pid,
             thread_pid: Arc::new(RwLock::new(PidStrcut::new())),
@@ -758,6 +798,7 @@ impl ProcessControlBlock {
             robust_list: RwLock::new(None),
             nsproxy: Arc::new(RwLock::new(NsProxy::new())),
             cred: SpinLock::new(cred),
+            self_ref: Weak::new(),
         };
 
         pcb.sig_info.write().set_tty(tty);
@@ -768,7 +809,10 @@ impl ProcessControlBlock {
             .lock()
             .init_syscall_stack(&pcb.syscall_stack.read());
 
-        let pcb = Arc::new(pcb);
+        let pcb = Arc::new_cyclic(|weak| {
+            pcb.self_ref = weak.clone();
+            pcb
+        });
 
         pcb.sched_info()
             .sched_entity()
@@ -1016,6 +1060,11 @@ impl ProcessControlBlock {
         return has_pending;
     }
 
+    /// 根据 pcb 的 flags 判断当前进程是否有未处理的信号
+    pub fn has_pending_signal_fast(&self) -> bool {
+        self.flags.get().contains(ProcessFlags::HAS_PENDING_SIGNAL)
+    }
+
     pub fn sig_struct(&self) -> SpinLockGuard<SignalStruct> {
         self.sig_struct.lock_irqsave()
     }
@@ -1055,6 +1104,19 @@ impl ProcessControlBlock {
     pub fn set_nsproxy(&self, nsprsy: NsProxy) {
         *self.nsproxy.write() = nsprsy;
     }
+
+    /// Exit fd table when process exit
+    fn exit_files(&self) {
+        self.basic.write_irqsave().set_fd_table(None);
+    }
+
+    pub fn children_read_irqsave(&self) -> RwLockReadGuard<Vec<Pid>> {
+        self.children.read_irqsave()
+    }
+
+    pub fn threads_read_irqsave(&self) -> RwLockReadGuard<ThreadInfo> {
+        self.thread.read_irqsave()
+    }
 }
 
 impl Drop for ProcessControlBlock {
@@ -1086,6 +1148,12 @@ pub struct ThreadInfo {
     group_leader: Weak<ProcessControlBlock>,
 }
 
+impl Default for ThreadInfo {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ThreadInfo {
     pub fn new() -> Self {
         Self {
@@ -1526,8 +1594,9 @@ pub fn process_init() {
 
 #[derive(Debug)]
 pub struct ProcessSignalInfo {
-    // 当前进程
-    sig_block: SigSet,
+    // 当前进程被屏蔽的信号
+    sig_blocked: SigSet,
+    saved_sigmask: SigSet,
     // sig_pending 中存储当前线程要处理的信号
     sig_pending: SigPending,
     // sig_shared_pending 中存储当前线程所属进程要处理的信号
@@ -1537,8 +1606,8 @@ pub struct ProcessSignalInfo {
 }
 
 impl ProcessSignalInfo {
-    pub fn sig_block(&self) -> &SigSet {
-        &self.sig_block
+    pub fn sig_blocked(&self) -> &SigSet {
+        &self.sig_blocked
     }
 
     pub fn sig_pending(&self) -> &SigPending {
@@ -1550,7 +1619,15 @@ impl ProcessSignalInfo {
     }
 
     pub fn sig_block_mut(&mut self) -> &mut SigSet {
-        &mut self.sig_block
+        &mut self.sig_blocked
+    }
+
+    pub fn saved_sigmask(&self) -> &SigSet {
+        &self.saved_sigmask
+    }
+
+    pub fn saved_sigmask_mut(&mut self) -> &mut SigSet {
+        &mut self.saved_sigmask
     }
 
     pub fn sig_shared_pending_mut(&mut self) -> &mut SigPending {
@@ -1575,12 +1652,19 @@ impl ProcessSignalInfo {
     ///
     /// - `sig_mask` 被忽略掉的信号
     ///
-    pub fn dequeue_signal(&mut self, sig_mask: &SigSet) -> (Signal, Option<SigInfo>) {
+    pub fn dequeue_signal(
+        &mut self,
+        sig_mask: &SigSet,
+        pcb: &Arc<ProcessControlBlock>,
+    ) -> (Signal, Option<SigInfo>) {
         let res = self.sig_pending.dequeue_signal(sig_mask);
+        pcb.recalc_sigpending(Some(self));
         if res.0 != Signal::INVALID {
             return res;
         } else {
-            return self.sig_shared_pending.dequeue_signal(sig_mask);
+            let res = self.sig_shared_pending.dequeue_signal(sig_mask);
+            pcb.recalc_sigpending(Some(self));
+            return res;
         }
     }
 }
@@ -1588,7 +1672,8 @@ impl ProcessSignalInfo {
 impl Default for ProcessSignalInfo {
     fn default() -> Self {
         Self {
-            sig_block: SigSet::empty(),
+            sig_blocked: SigSet::empty(),
+            saved_sigmask: SigSet::empty(),
             sig_pending: SigPending::default(),
             sig_shared_pending: SigPending::default(),
             tty: None,

+ 11 - 8
kernel/src/sched/completion.rs

@@ -4,6 +4,7 @@ use system_error::SystemError;
 
 use crate::{
     libs::{spinlock::SpinLock, wait_queue::WaitQueue},
+    process::ProcessManager,
     time::timer::schedule_timeout,
 };
 
@@ -29,20 +30,22 @@ impl Completion {
     /// @return 返回剩余时间或者SystemError
     fn do_wait_for_common(&self, mut timeout: i64, interuptible: bool) -> Result<i64, SystemError> {
         let mut inner = self.inner.lock_irqsave();
-
+        let pcb = ProcessManager::current_pcb();
         if inner.done == 0 {
             //loop break 类似 do while 保证进行一次信号检测
             loop {
                 //检查当前线程是否有未处理的信号
-                //             if (signal_pending_state(state, current)) {
-                // timeout = -ERESTARTSYS;
-                // break;
-                //}
+                if pcb.sig_info_irqsave().sig_pending().has_pending() {
+                    return Err(SystemError::ERESTARTSYS);
+                }
 
-                if interuptible {
-                    unsafe { inner.wait_queue.sleep_without_schedule() };
+                let e = if interuptible {
+                    unsafe { inner.wait_queue.sleep_without_schedule() }
                 } else {
-                    unsafe { inner.wait_queue.sleep_without_schedule_uninterruptible() };
+                    unsafe { inner.wait_queue.sleep_without_schedule_uninterruptible() }
+                };
+                if e.is_err() {
+                    break;
                 }
                 drop(inner);
                 timeout = schedule_timeout(timeout)?;

+ 11 - 2
kernel/src/syscall/mod.rs

@@ -238,6 +238,10 @@ impl Syscall {
                 let r = args[0] as *const u8;
                 Self::chdir(r)
             }
+            SYS_FCHDIR => {
+                let fd = args[0] as i32;
+                Self::fchdir(fd)
+            }
 
             #[allow(unreachable_patterns)]
             SYS_GETDENTS64 | SYS_GETDENTS => {
@@ -879,8 +883,11 @@ impl Syscall {
             }
 
             SYS_RT_SIGPROCMASK => {
-                warn!("SYS_RT_SIGPROCMASK has not yet been implemented");
-                Ok(0)
+                let how = args[0] as i32;
+                let nset = args[1];
+                let oset = args[2];
+                let sigsetsize = args[3];
+                Self::rt_sigprocmask(how, nset, oset, sigsetsize)
             }
 
             SYS_TKILL => {
@@ -1213,6 +1220,8 @@ impl Syscall {
                 let flags = args[4] as u32;
                 Self::sys_perf_event_open(attr, pid, cpu, group_fd, flags)
             }
+            SYS_SETRLIMIT => Ok(0),
+            SYS_RESTART_SYSCALL => Self::restart_syscall(),
             _ => panic!("Unsupported syscall ID: {}", syscall_num),
         };
 

+ 1 - 1
tools/BUILD_CONTAINER_VERSION

@@ -1 +1 @@
-v1.7
+v1.8

+ 11 - 10
tools/bootstrap.sh

@@ -24,6 +24,7 @@ DEFAULT_INSTALL="false"
 export RUSTUP_DIST_SERVER=${RUSTUP_DIST_SERVER:-https://rsproxy.cn}
 export RUSTUP_UPDATE_ROOT=${RUSTUP_UPDATE_ROOT:-https://rsproxy.cn/rustup}
 export RUST_VERSION="${RUST_VERSION:-nightly-2024-11-05}"
+export RUST_VERSION_OLD="${RUST_VERSION:-nightly-2024-07-23}"
 
 banner()
 {
@@ -65,7 +66,7 @@ install_ubuntu_debian_pkg()
         lsb-release \
         llvm-dev libclang-dev clang gcc-multilib \
         gcc build-essential fdisk dosfstools dnsmasq bridge-utils iptables libssl-dev pkg-config \
-		sphinx make git
+		python3-sphinx make git
 	# 必须分开安装,否则会出现错误
 	sudo "$1" install -y \
 		gcc-riscv64-unknown-elf gcc-riscv64-linux-gnu gdb-multiarch
@@ -233,21 +234,21 @@ rustInstall() {
         echo "正在安装DragonOS所需的rust组件...首次安装需要一些时间来更新索引,请耐心等待..."
         cargo install cargo-binutils
 		cargo install bpf-linker
-		rustup toolchain install nightly-2024-11-05-x86_64-unknown-linux-gnu
 		rustup toolchain install $RUST_VERSION-x86_64-unknown-linux-gnu
+		rustup toolchain install $RUST_VERSION_OLD-x86_64-unknown-linux-gnu
 		rustup component add rust-src --toolchain $RUST_VERSION-x86_64-unknown-linux-gnu
-		rustup component add rust-src --toolchain nightly-2024-11-05-x86_64-unknown-linux-gnu
+		rustup component add rust-src --toolchain $RUST_VERSION_OLD-x86_64-unknown-linux-gnu
 		rustup target add x86_64-unknown-none --toolchain $RUST_VERSION-x86_64-unknown-linux-gnu
-		rustup target add x86_64-unknown-none --toolchain nightly-2024-11-05-x86_64-unknown-linux-gnu
-		rustup target add x86_64-unknown-linux-musl --toolchain nightly-2024-11-05-x86_64-unknown-linux-gnu
+		rustup target add x86_64-unknown-none --toolchain $RUST_VERSION_OLD-x86_64-unknown-linux-gnu
 		rustup target add x86_64-unknown-linux-musl --toolchain $RUST_VERSION-x86_64-unknown-linux-gnu
+		rustup target add x86_64-unknown-linux-musl --toolchain $RUST_VERSION_OLD-x86_64-unknown-linux-gnu
 
 		rustup toolchain install $RUST_VERSION-riscv64gc-unknown-linux-gnu --force-non-host
-		rustup toolchain install nightly-2024-11-05-riscv64gc-unknown-linux-gnu --force-non-host
+		rustup toolchain install $RUST_VERSION_OLD-riscv64gc-unknown-linux-gnu --force-non-host
 		rustup target add riscv64gc-unknown-none-elf --toolchain $RUST_VERSION-riscv64gc-unknown-linux-gnu
 		rustup target add riscv64imac-unknown-none-elf --toolchain $RUST_VERSION-riscv64gc-unknown-linux-gnu
-		rustup target add riscv64gc-unknown-none-elf --toolchain nightly-2024-11-05-riscv64gc-unknown-linux-gnu
-		rustup target add riscv64imac-unknown-none-elf --toolchain nightly-2024-11-05-riscv64gc-unknown-linux-gnu
+		rustup target add riscv64gc-unknown-none-elf --toolchain $RUST_VERSION_OLD-riscv64gc-unknown-linux-gnu
+		rustup target add riscv64imac-unknown-none-elf --toolchain $RUST_VERSION_OLD-riscv64gc-unknown-linux-gnu
         
 		rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu
 		rustup component add rust-src
@@ -338,9 +339,9 @@ cargo install dadk || exit 1
 bashpath=$(cd `dirname $0`; pwd)
 
 # 编译安装musl交叉编译工具链
-bash ${bashpath}/install_musl_gcc.sh || (echo "musl交叉编译工具链安装失败" && exit 1)
+$SHELL ${bashpath}/install_musl_gcc.sh || (echo "musl交叉编译工具链安装失败" && exit 1)
 # 编译安装grub
-bash ${bashpath}/grub_auto_install.sh || (echo "grub安装失败" && exit 1)
+$SHELL ${bashpath}/grub_auto_install.sh || (echo "grub安装失败" && exit 1)
 
 # 解决kvm权限问题
 USR=$USER

+ 1 - 1
tools/build_in_docker.sh

@@ -1,6 +1,6 @@
 docker rm -f dragonos-build || echo "No existed container"
 cpu_count=$(cat /proc/cpuinfo |grep "processor"|wc -l)
-docker run --rm --privileged=true --cap-add SYS_ADMIN --cap-add MKNOD -v $(pwd):/data -v /dev:/dev -v dragonos-build-cargo:/root/.cargo/registry --name dragonos-build -i dragonos/dragonos-dev:v1.2 bash << EOF
+docker run --rm --privileged=true --cap-add SYS_ADMIN --cap-add MKNOD -v $(pwd):/data -v /dev:/dev -v dragonos-build-cargo:/root/.cargo/registry --name dragonos-build -i dragonos/dragonos-dev:v1.8 bash << EOF
 source ~/.cargo/env
 source ~/.bashrc
 cd /data

+ 2 - 1
tools/change_rust_src.sh

@@ -2,7 +2,8 @@ echo "正在为rust换源"
 
 sparse="false"
 
-CONFIG_FILE=~/.cargo/config.toml
+CARGO_HOME=${CARGO_HOME:-~/.cargo}
+CONFIG_FILE=$CARGO_HOME/config.toml
 # 创建父目录
 if [ ! -d ~/.cargo ]; then
     mkdir -p ~/.cargo

+ 3 - 2
tools/run-qemu.sh

@@ -78,7 +78,8 @@ QEMU_MONITOR="-monitor stdio"
 QEMU_TRACE="${qemu_trace_std}"
 QEMU_CPU_FEATURES=""
 QEMU_RTC_CLOCK=""
-QEMU_SERIAL="-serial file:../serial_opt.txt"
+QEMU_SERIAL_LOG_FILE="../serial_opt.txt"
+QEMU_SERIAL="-serial file:${QEMU_SERIAL_LOG_FILE}"
 QEMU_DRIVE="id=disk,file=${QEMU_DISK_IMAGE},if=none"
 QEMU_ACCELARATE=""
 QEMU_ARGUMENT=""
@@ -137,7 +138,7 @@ while true;do
               window)
               ;;
               nographic)
-              QEMU_SERIAL=" -serial mon:stdio "
+              QEMU_SERIAL=" -serial chardev:mux -monitor chardev:mux -chardev stdio,id=mux,mux=on,signal=off,logfile=${QEMU_SERIAL_LOG_FILE} "
               QEMU_MONITOR=""
               QEMU_ARGUMENT+=" --nographic "
               QEMU_ARGUMENT+=" -kernel ../bin/kernel/kernel.elf "

+ 1 - 0
user/apps/test_ebpf/syscall_ebpf/.gitignore → user/apps/syscall_ebpf/.gitignore

@@ -7,3 +7,4 @@ target/
 
 # These are backup files generated by rustfmt
 **/*.rs.bk
+/install/

+ 31 - 0
user/apps/syscall_ebpf/Cargo.toml

@@ -0,0 +1,31 @@
+[workspace]
+resolver = "2"
+members = ["syscall_ebpf", "syscall_ebpf-common", "syscall_ebpf-ebpf"]
+default-members = ["syscall_ebpf", "syscall_ebpf-common"]
+
+[workspace.dependencies]
+aya = { version = "0.13.0", default-features = false }
+aya-ebpf = { version = "0.1.1", default-features = false }
+aya-log = { version = "0.2.1", default-features = false }
+aya-log-ebpf = { version = "0.1.1", default-features = false }
+anyhow = { version = "1", default-features = false }
+cargo_metadata = { version = "0.18.0", default-features = false }
+# `std` feature is currently required to build `clap`.
+#
+# See https://github.com/clap-rs/clap/blob/61f5ee5/clap_builder/src/lib.rs#L15.
+clap = { version = "4.5.20", default-features = false, features = ["std"] }
+env_logger = { version = "0.11.5", default-features = false }
+libc = { version = "0.2.159", default-features = false }
+log = { version = "0.4.22", default-features = false }
+tokio = { version = "1.40.0", default-features = false }
+which = { version = "6.0.0", default-features = false }
+
+[profile.dev]
+panic = "abort"
+
+[profile.release]
+panic = "abort"
+
+[profile.release.package.syscall_ebpf-ebpf]
+debug = 2
+codegen-units = 1

+ 6 - 10
user/apps/test_ebpf/Makefile → user/apps/syscall_ebpf/Makefile

@@ -21,10 +21,10 @@ endif
 run:
 	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET)
 
-build:build-ebpf
+build:
 	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET)
 
-clean:clean-ebpf
+clean:
 	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET)
 
 test:
@@ -42,20 +42,16 @@ fmt-check:
 run-release:
 	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET) --release
 
-build-release:build-ebpf
+build-release:
 	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET) --release
 
-clean-release:clean-ebpf
+clean-release:
 	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET) --release
 
 test-release:
 	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET) --release
 
-build-ebpf:
-	cd ./syscall_ebpf && RUST_LOG=debug cargo xtask build --release
-clean-ebpf:
-	cd ./syscall_ebpf && cargo clean
 
 .PHONY: install
-install:build-ebpf
-	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) install --target $(RUST_TARGET) --path . --no-track --root $(INSTALL_DIR) --force
+install:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) install --target $(RUST_TARGET) --path ./syscall_ebpf --no-track --root $(INSTALL_DIR) --force

+ 33 - 0
user/apps/syscall_ebpf/README.md

@@ -0,0 +1,33 @@
+# syscall_ebpf
+
+## Prerequisites
+
+1. stable rust toolchains: `rustup toolchain install stable`
+1. nightly rust toolchains: `rustup toolchain install nightly --component rust-src`
+1. (if cross-compiling) rustup target: `rustup target add ${ARCH}-unknown-linux-musl`
+1. (if cross-compiling) LLVM: (e.g.) `brew install llvm` (on macOS)
+1. (if cross-compiling) C toolchain: (e.g.) [`brew install filosottile/musl-cross/musl-cross`](https://github.com/FiloSottile/homebrew-musl-cross) (on macOS)
+1. bpf-linker: `cargo install bpf-linker` (`--no-default-features` on macOS)
+
+## Build & Run
+
+Use `cargo build`, `cargo check`, etc. as normal. Run your program with:
+
+```shell
+cargo run --release --config 'target."cfg(all())".runner="sudo -E"'
+```
+
+Cargo build scripts are used to automatically build the eBPF correctly and include it in the
+program.
+
+## Cross-compiling on macOS
+
+Cross compilation should work on both Intel and Apple Silicon Macs.
+
+```shell
+CC=${ARCH}-linux-musl-gcc cargo build --package syscall_ebpf --release \
+  --target=${ARCH}-unknown-linux-musl \
+  --config=target.${ARCH}-unknown-linux-musl.linker=\"${ARCH}-linux-musl-gcc\"
+```
+The cross-compiled program `target/${ARCH}-unknown-linux-musl/release/syscall_ebpf` can be
+copied to a Linux server or VM and run there.

+ 4 - 0
user/apps/syscall_ebpf/rustfmt.toml

@@ -0,0 +1,4 @@
+group_imports = "StdExternalCrate"
+imports_granularity = "Crate"
+reorder_imports = true
+unstable_features = true

+ 1 - 1
user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-common/Cargo.toml → user/apps/syscall_ebpf/syscall_ebpf-common/Cargo.toml

@@ -8,7 +8,7 @@ default = []
 user = ["aya"]
 
 [dependencies]
-aya = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/tiny-aya.git", rev = "0689f13", optional = true }
+aya = { workspace = true, optional = true }
 
 [lib]
 path = "src/lib.rs"

+ 0 - 0
user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-common/src/lib.rs → user/apps/syscall_ebpf/syscall_ebpf-common/src/lib.rs


+ 12 - 0
user/apps/syscall_ebpf/syscall_ebpf-ebpf/.cargo/config.toml

@@ -0,0 +1,12 @@
+# We have this so that one doesn't need to manually pass
+# --target=bpfel-unknown-none -Z build-std=core when running cargo
+# check/build/doc etc.
+#
+# NB: this file gets loaded only if you run cargo from this directory, it's
+# ignored if you run from the workspace root. See
+# https://doc.rust-lang.org/cargo/reference/config.html#hierarchical-structure
+[build]
+target = ["bpfeb-unknown-none", "bpfel-unknown-none"]
+
+[unstable]
+build-std = ["core"]

+ 17 - 0
user/apps/syscall_ebpf/syscall_ebpf-ebpf/Cargo.toml

@@ -0,0 +1,17 @@
+[package]
+name = "syscall_ebpf-ebpf"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+syscall_ebpf-common = { path = "../syscall_ebpf-common" }
+
+aya-ebpf = { workspace = true }
+aya-log-ebpf = { workspace = true }
+
+[build-dependencies]
+which = { workspace = true }
+
+[[bin]]
+name = "syscall_ebpf"
+path = "src/main.rs"

+ 17 - 0
user/apps/syscall_ebpf/syscall_ebpf-ebpf/build.rs

@@ -0,0 +1,17 @@
+use which::which;
+
+/// Building this crate has an undeclared dependency on the `bpf-linker` binary. This would be
+/// better expressed by [artifact-dependencies][bindeps] but issues such as
+/// https://github.com/rust-lang/cargo/issues/12385 make their use impractical for the time being.
+///
+/// This file implements an imperfect solution: it causes cargo to rebuild the crate whenever the
+/// mtime of `which bpf-linker` changes. Note that possibility that a new bpf-linker is added to
+/// $PATH ahead of the one used as the cache key still exists. Solving this in the general case
+/// would require rebuild-if-changed-env=PATH *and* rebuild-if-changed={every-directory-in-PATH}
+/// which would likely mean far too much cache invalidation.
+///
+/// [bindeps]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html?highlight=feature#artifact-dependencies
+fn main() {
+    let bpf_linker = which("bpf-linker").unwrap();
+    println!("cargo:rerun-if-changed={}", bpf_linker.to_str().unwrap());
+}

+ 3 - 0
user/apps/syscall_ebpf/syscall_ebpf-ebpf/rust-toolchain.toml

@@ -0,0 +1,3 @@
+[toolchain]
+channel = "nightly"
+components = ["rust-src"]

+ 3 - 0
user/apps/syscall_ebpf/syscall_ebpf-ebpf/src/lib.rs

@@ -0,0 +1,3 @@
+#![no_std]
+
+// This file exists to enable the library target.

+ 50 - 0
user/apps/syscall_ebpf/syscall_ebpf-ebpf/src/main.rs

@@ -0,0 +1,50 @@
+#![no_std]
+#![no_main]
+
+use aya_ebpf::{
+    helpers::bpf_ktime_get_ns,
+    macros::{kprobe, map},
+    maps::HashMap,
+    programs::ProbeContext,
+};
+use aya_log_ebpf::info;
+
+#[kprobe]
+pub fn syscall_ebpf(ctx: ProbeContext) -> u32 {
+    try_syscall_ebpf(ctx).unwrap_or_else(|ret| ret)
+}
+
+fn try_syscall_ebpf(ctx: ProbeContext) -> Result<u32, u32> {
+    let pt_regs = unsafe { &*ctx.regs };
+    // first arg -> rdi
+    // second arg -> rsi
+    // third arg -> rdx
+    // four arg -> rcx
+    let syscall_num = pt_regs.rsi as usize;
+    if syscall_num != 1 {
+        unsafe {
+            if let Some(v) = SYSCALL_LIST.get(&(syscall_num as u32)) {
+                let new_v = *v + 1;
+                SYSCALL_LIST
+                    .insert(&(syscall_num as u32), &new_v, 0)
+                    .unwrap();
+            } else {
+                SYSCALL_LIST.insert(&(syscall_num as u32), &1, 0).unwrap();
+            }
+        }
+        let time = unsafe { bpf_ktime_get_ns() };
+        info!(&ctx, "[{}] invoke syscall {}", time, syscall_num);
+    }
+    Ok(0)
+}
+
+#[map]
+static SYSCALL_LIST: HashMap<u32, u32> = HashMap::<u32, u32>::with_max_entries(1024, 0);
+
+#[cfg(not(test))]
+#[panic_handler]
+fn panic(_info: &core::panic::PanicInfo) -> ! {
+    // we need use this because the verifier will forbid loop
+    unsafe { core::hint::unreachable_unchecked() }
+    // loop{}
+}

+ 35 - 0
user/apps/syscall_ebpf/syscall_ebpf/Cargo.toml

@@ -0,0 +1,35 @@
+[package]
+name = "syscall_ebpf"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+syscall_ebpf-common = { path = "../syscall_ebpf-common", features = ["user"] }
+
+anyhow = { workspace = true, default-features = true }
+aya = { workspace = true }
+aya-log = { workspace = true }
+env_logger = { workspace = true }
+libc = { workspace = true }
+log = { workspace = true }
+tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "net", "signal", "time"] }
+
+[build-dependencies]
+cargo_metadata = { workspace = true }
+# TODO(https://github.com/rust-lang/cargo/issues/12375): this should be an artifact dependency, but
+# it's not possible to tell cargo to use `-Z build-std` to build it. We cargo-in-cargo in the build
+# script to build this, but we want to teach cargo about the dependecy so that cache invalidation
+# works properly.
+#
+# Note also that https://github.com/rust-lang/cargo/issues/10593 occurs when `target = ...` is added
+# to an artifact dependency; it seems possible to work around that by setting `resolver = "1"` in
+# Cargo.toml in the workspace root.
+#
+# Finally note that *any* usage of `artifact = ...` in *any* Cargo.toml in the workspace breaks
+# workflows with stable cargo; stable cargo outright refuses to load manifests that use unstable
+# features.
+syscall_ebpf-ebpf = { path = "../syscall_ebpf-ebpf" }
+
+[[bin]]
+name = "syscall_ebpf"
+path = "src/main.rs"

+ 150 - 0
user/apps/syscall_ebpf/syscall_ebpf/build.rs

@@ -0,0 +1,150 @@
+use std::{
+    env, fs,
+    io::{BufRead as _, BufReader},
+    path::PathBuf,
+    process::{Child, Command, Stdio},
+};
+
+use cargo_metadata::{
+    Artifact, CompilerMessage, Message, Metadata, MetadataCommand, Package, Target,
+};
+
+/// This crate has a runtime dependency on artifacts produced by the `syscall_ebpf-ebpf` crate.
+/// This would be better expressed as one or more [artifact-dependencies][bindeps] but issues such
+/// as:
+///
+/// * https://github.com/rust-lang/cargo/issues/12374
+/// * https://github.com/rust-lang/cargo/issues/12375
+/// * https://github.com/rust-lang/cargo/issues/12385
+///
+/// prevent their use for the time being.
+///
+/// [bindeps]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html?highlight=feature#artifact-dependencies
+fn main() {
+    let Metadata { packages, .. } = MetadataCommand::new().no_deps().exec().unwrap();
+    let ebpf_package = packages
+        .into_iter()
+        .find(|Package { name, .. }| name == "syscall_ebpf-ebpf")
+        .unwrap();
+
+    let out_dir = env::var_os("OUT_DIR").unwrap();
+    let out_dir = PathBuf::from(out_dir);
+
+    let endian = env::var_os("CARGO_CFG_TARGET_ENDIAN").unwrap();
+    let target = if endian == "big" {
+        "bpfeb"
+    } else if endian == "little" {
+        "bpfel"
+    } else {
+        panic!("unsupported endian={:?}", endian)
+    };
+
+    // TODO(https://github.com/rust-lang/cargo/issues/4001): Make this `false` if we can determine
+    // we're in a check build.
+    let build_ebpf = true;
+    if build_ebpf {
+        let arch = env::var_os("CARGO_CFG_TARGET_ARCH").unwrap();
+
+        let target = format!("{target}-unknown-none");
+
+        let Package { manifest_path, .. } = ebpf_package;
+        let ebpf_dir = manifest_path.parent().unwrap();
+
+        // We have a build-dependency on `syscall_ebpf-ebpf`, so cargo will automatically rebuild us
+        // if `syscall_ebpf-ebpf`'s *library* target or any of its dependencies change. Since we
+        // depend on `syscall_ebpf-ebpf`'s *binary* targets, that only gets us half of the way. This
+        // stanza ensures cargo will rebuild us on changes to the binaries too, which gets us the
+        // rest of the way.
+        println!("cargo:rerun-if-changed={}", ebpf_dir.as_str());
+
+        let mut cmd = Command::new("cargo");
+        cmd.args([
+            "build",
+            "-Z",
+            "build-std=core",
+            "--bins",
+            "--message-format=json",
+            "--release",
+            "--target",
+            &target,
+        ]);
+
+        cmd.env("CARGO_CFG_BPF_TARGET_ARCH", arch);
+
+        // Workaround to make sure that the rust-toolchain.toml is respected.
+        for key in ["RUSTUP_TOOLCHAIN", "RUSTC", "RUSTC_WORKSPACE_WRAPPER"] {
+            cmd.env_remove(key);
+        }
+        cmd.current_dir(ebpf_dir);
+
+        // Workaround for https://github.com/rust-lang/cargo/issues/6412 where cargo flocks itself.
+        let ebpf_target_dir = out_dir.join("../syscall_ebpf-ebpf");
+        cmd.arg("--target-dir").arg(&ebpf_target_dir);
+
+        let mut child = cmd
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped())
+            .spawn()
+            .unwrap_or_else(|err| panic!("failed to spawn {cmd:?}: {err}"));
+        let Child { stdout, stderr, .. } = &mut child;
+
+        // Trampoline stdout to cargo warnings.
+        let stderr = stderr.take().unwrap();
+        let stderr = BufReader::new(stderr);
+        let stderr = std::thread::spawn(move || {
+            for line in stderr.lines() {
+                let line = line.unwrap();
+                println!("cargo:warning={line}");
+            }
+        });
+
+        let stdout = stdout.take().unwrap();
+        let stdout = BufReader::new(stdout);
+        let mut executables = Vec::new();
+        for message in Message::parse_stream(stdout) {
+            #[allow(clippy::collapsible_match)]
+            match message.expect("valid JSON") {
+                Message::CompilerArtifact(Artifact {
+                    executable,
+                    target: Target { name, .. },
+                    ..
+                }) => {
+                    if let Some(executable) = executable {
+                        executables.push((name, executable.into_std_path_buf()));
+                    }
+                }
+                Message::CompilerMessage(CompilerMessage { message, .. }) => {
+                    for line in message.rendered.unwrap_or_default().split('\n') {
+                        println!("cargo:warning={line}");
+                    }
+                }
+                Message::TextLine(line) => {
+                    println!("cargo:warning={line}");
+                }
+                _ => {}
+            }
+        }
+
+        let status = child
+            .wait()
+            .unwrap_or_else(|err| panic!("failed to wait for {cmd:?}: {err}"));
+        assert_eq!(status.code(), Some(0), "{cmd:?} failed: {status:?}");
+
+        stderr.join().map_err(std::panic::resume_unwind).unwrap();
+
+        for (name, binary) in executables {
+            let dst = out_dir.join(name);
+            let _: u64 = fs::copy(&binary, &dst)
+                .unwrap_or_else(|err| panic!("failed to copy {binary:?} to {dst:?}: {err}"));
+        }
+    } else {
+        let Package { targets, .. } = ebpf_package;
+        for Target { name, kind, .. } in targets {
+            if *kind != ["bin"] {
+                continue;
+            }
+            let dst = out_dir.join(name);
+            fs::write(&dst, []).unwrap_or_else(|err| panic!("failed to create {dst:?}: {err}"));
+        }
+    }
+}

+ 74 - 0
user/apps/syscall_ebpf/syscall_ebpf/src/main.rs

@@ -0,0 +1,74 @@
+use aya::{maps::HashMap, programs::KProbe};
+#[rustfmt::skip]
+use log::{debug, warn};
+use tokio::{signal, task::yield_now, time};
+
+extern crate libc;
+
+#[tokio::main(flavor = "current_thread")]
+async fn main() -> anyhow::Result<()> {
+    // env_logger::init();
+    env_logger::builder()
+        .filter_level(log::LevelFilter::Warn)
+        .format_timestamp(None)
+        .init();
+
+    // Bump the memlock rlimit. This is needed for older kernels that don't use the
+    // new memcg based accounting, see https://lwn.net/Articles/837122/
+    let rlim = libc::rlimit {
+        rlim_cur: libc::RLIM_INFINITY,
+        rlim_max: libc::RLIM_INFINITY,
+    };
+    let ret = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &rlim) };
+    if ret != 0 {
+        debug!("remove limit on locked memory failed, ret is: {}", ret);
+    }
+
+    // This will include your eBPF object file as raw bytes at compile-time and load it at
+    // runtime. This approach is recommended for most real-world use cases. If you would
+    // like to specify the eBPF program at runtime rather than at compile-time, you can
+    // reach for `Bpf::load_file` instead.
+    let mut ebpf = aya::Ebpf::load(aya::include_bytes_aligned!(concat!(
+        env!("OUT_DIR"),
+        "/syscall_ebpf"
+    )))?;
+    if let Err(e) = aya_log::EbpfLogger::init(&mut ebpf) {
+        // This can happen if you remove all log statements from your eBPF program.
+        warn!("failed to initialize eBPF logger: {}", e);
+    }
+
+    let program: &mut KProbe = ebpf.program_mut("syscall_ebpf").unwrap().try_into()?;
+    program.load()?;
+    program.attach("dragonos_kernel::syscall::Syscall::handle", 0)?;
+    // println!("attacch the kprobe to dragonos_kernel::syscall::Syscall::handle");
+
+    // print the value of the blocklist per 5 seconds
+    tokio::spawn(async move {
+        let blocklist: HashMap<_, u32, u32> =
+            HashMap::try_from(ebpf.map("SYSCALL_LIST").unwrap()).unwrap();
+        let mut now = time::Instant::now();
+        loop {
+            let new_now = time::Instant::now();
+            let duration = new_now.duration_since(now);
+            if duration.as_secs() >= 5 {
+                println!("------------SYSCALL_LIST----------------");
+                let iter = blocklist.iter();
+                for item in iter {
+                    if let Ok((key, value)) = item {
+                        println!("syscall: {:?}, count: {:?}", key, value);
+                    }
+                }
+                println!("----------------------------------------");
+                now = new_now;
+            }
+            yield_now().await;
+        }
+    });
+
+    let ctrl_c = signal::ctrl_c();
+    println!("Waiting for Ctrl-C...");
+    ctrl_c.await?;
+    println!("Exiting...");
+
+    Ok(())
+}

+ 1 - 0
user/apps/test-sigprocmask/.gitignore

@@ -0,0 +1 @@
+test-sigprocmask

+ 20 - 0
user/apps/test-sigprocmask/Makefile

@@ -0,0 +1,20 @@
+ifeq ($(ARCH), x86_64)
+	CROSS_COMPILE=x86_64-linux-musl-
+else ifeq ($(ARCH), riscv64)
+	CROSS_COMPILE=riscv64-linux-musl-
+endif
+
+CC=$(CROSS_COMPILE)gcc
+
+.PHONY: all
+all: main.c
+	$(CC) -static -o test-sigprocmask main.c
+
+.PHONY: install clean
+install: all
+	mv test-sigprocmask $(DADK_CURRENT_BUILD_DIR)/test-sigprocmask
+
+clean:
+	rm test-sigprocmask *.o
+
+fmt:

+ 132 - 0
user/apps/test-sigprocmask/main.c

@@ -0,0 +1,132 @@
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define TEST_ASSERT(left, right, success_msg, fail_msg)                        \
+    do {                                                                       \
+        if ((left) == (right)) {                                               \
+            printf("[PASS] %s\n", success_msg);                                \
+        } else {                                                               \
+            printf("[FAIL] %s: Expected 0x%lx, but got 0x%lx\n",               \
+                   fail_msg,                                                   \
+                   (unsigned long)(right),                                     \
+                   (unsigned long)(left));                                     \
+        }                                                                      \
+    } while (0)
+
+
+static int signal_received = 0;
+
+void signal_handler(int signo) {
+    if (signo == SIGINT) {
+        printf("\nReceived SIGINT (Ctrl+C)\n");
+        signal_received = 1;
+    }
+}
+
+void print_signal_mask(const char *msg, const sigset_t *mask) {
+    printf("%s: ", msg);
+    for (int signo = 1; signo < NSIG; ++signo) {
+        if (sigismember(mask, signo)) {
+            printf("%d ", signo);
+        }
+    }
+    printf("\n");
+}
+
+// 获取当前屏蔽字的函数
+unsigned long get_signal_mask() {
+    sigset_t sigset;
+    if (sigprocmask(SIG_BLOCK, NULL, &sigset) == -1) {
+        perror("sigprocmask");
+        return -1; // 返回错误标记
+    }
+
+    // 将信号集编码为位掩码
+    unsigned long mask = 0;
+    for (int i = 1; i < NSIG; i++) {
+        if (sigismember(&sigset, i)) {
+            mask |= 1UL << (i - 1);
+        }
+    }
+    return mask;
+}
+
+int main() {
+    sigset_t new_mask, old_mask;
+    sigemptyset(&old_mask);
+
+    // 注册 SIGINT 的信号处理函数
+    if (signal(SIGINT, signal_handler) == SIG_ERR) {
+        perror("signal");
+        exit(EXIT_FAILURE);
+    }
+    printf("Signal handler for SIGINT is registered.\n");
+    signal_received = 0;
+    kill(getpid(), SIGINT);
+    sleep(5);
+
+    TEST_ASSERT(signal_received, 1, "SIGINT was received", "SIGINT was not received");
+    signal_received = 0;
+
+    // 初始化新的信号集,并将 SIGINT 添加到其中
+    sigemptyset(&new_mask);
+    sigaddset(&new_mask, SIGINT);
+
+    // 打印 new_mask 的值
+    print_signal_mask("new_mask", &new_mask);
+
+    // 屏蔽 SIGINT
+    if (sigprocmask(SIG_BLOCK, &new_mask, &old_mask) < 0) {
+        perror("sigprocmask - SIG_BLOCK");
+        exit(EXIT_FAILURE);
+    }
+
+    // 打印 old_mask 的值
+    print_signal_mask("old_mask", &old_mask);
+
+    // 检查 SIGINT 是否被屏蔽
+    unsigned long actual_mask = get_signal_mask();
+    unsigned long expected_mask = (1UL << (SIGINT - 1));
+    TEST_ASSERT(actual_mask,
+                expected_mask,
+                "Signal mask is as expected",
+                "Signal mask mismatch");
+
+    printf("SIGINT is now blocked.\n");
+    signal_received = 0;
+    // 向当前进程发送 SIGINT
+    kill(getpid(), SIGINT);
+
+    // 等待 5 秒,以便测试 SIGINT 是否被屏蔽
+    sleep(5);
+    TEST_ASSERT(signal_received, 0, "SIGINT was blocked", "SIGINT was not blocked");
+    signal_received = 0;
+    // 恢复原来的信号屏蔽字
+    if (sigprocmask(SIG_SETMASK, &old_mask, &old_mask) < 0) {
+        perror("sigprocmask - SIG_SETMASK");
+        exit(EXIT_FAILURE);
+    }
+    print_signal_mask("old_mask returned", &old_mask);
+
+    // 检查 SIGINT 是否被解除屏蔽
+    actual_mask = get_signal_mask();
+    expected_mask = 0;
+    TEST_ASSERT(actual_mask,
+                expected_mask,
+                "Signal mask is as expected",
+                "Signal mask mismatch");
+
+    printf("SIGINT is now unblocked.\n");
+
+    signal_received = 0;
+    kill(getpid(), SIGINT);
+
+    // 等待 5 秒,以便测试 SIGINT 是否解除屏蔽
+    sleep(5);
+    TEST_ASSERT(signal_received, 1, "SIGINT was received", "SIGINT was not received");
+
+    printf("Exiting program.\n");
+    return 0;
+}

+ 0 - 3
user/apps/test_ebpf/.gitignore

@@ -1,3 +0,0 @@
-/target
-Cargo.lock
-/install/

+ 0 - 16
user/apps/test_ebpf/Cargo.toml

@@ -1,16 +0,0 @@
-[package]
-name = "test_ebpf"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-aya = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/tiny-aya.git", rev = "0689f13" }
-aya-log = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/tiny-aya.git", rev = "0689f13" }
-
-log = "0.4.22"
-env_logger = "0.11.5"
-tokio = { version = "1.25", features = ["macros", "rt", "rt-multi-thread", "net", "signal", "time"] }
-
-[profile.release]
-lto = true
-strip = true

+ 0 - 60
user/apps/test_ebpf/src/main.rs

@@ -1,60 +0,0 @@
-use aya::maps::HashMap;
-use aya::programs::KProbe;
-use aya::{include_bytes_aligned, Ebpf};
-use aya_log::EbpfLogger;
-use log::{info, warn};
-use std::error::Error;
-use tokio::task::yield_now;
-use tokio::{signal, time};
-
-#[tokio::main(flavor = "current_thread")]
-async fn main() -> Result<(), Box<dyn Error>> {
-    env_logger::builder()
-        .filter_level(log::LevelFilter::Warn)
-        .format_timestamp(None)
-        .init();
-
-    let mut bpf = Ebpf::load(include_bytes_aligned!(
-        "../syscall_ebpf/target/bpfel-unknown-none/release/syscall_ebpf"
-    ))?;
-
-    // create a async task to read the log
-    if let Err(e) = EbpfLogger::init(&mut bpf) {
-        // This can happen if you remove all log statements from your eBPF program.
-        warn!("failed to initialize eBPF logger: {}", e);
-    }
-
-    let program: &mut KProbe = bpf.program_mut("syscall_ebpf").unwrap().try_into()?;
-    program.load()?;
-    program.attach("dragonos_kernel::syscall::Syscall::handle", 0)?;
-
-    info!("attacch the kprobe to dragonos_kernel::syscall::Syscall::handle");
-
-    // print the value of the blocklist per 5 seconds
-    tokio::spawn(async move {
-        let blocklist: HashMap<_, u32, u32> =
-            HashMap::try_from(bpf.map("SYSCALL_LIST").unwrap()).unwrap();
-        let mut now = time::Instant::now();
-        loop {
-            let new_now = time::Instant::now();
-            let duration = new_now.duration_since(now);
-            if duration.as_secs() >= 5 {
-                println!("------------SYSCALL_LIST----------------");
-                let iter = blocklist.iter();
-                for item in iter {
-                    if let Ok((key, value)) = item {
-                        println!("syscall: {:?}, count: {:?}", key, value);
-                    }
-                }
-                println!("----------------------------------------");
-                now = new_now;
-            }
-            yield_now().await;
-        }
-    });
-
-    info!("Waiting for Ctrl-C...");
-    signal::ctrl_c().await?;
-    info!("Exiting...");
-    Ok(())
-}

+ 0 - 2
user/apps/test_ebpf/syscall_ebpf/.cargo/config.toml

@@ -1,2 +0,0 @@
-[alias]
-xtask = "run --package xtask --"

+ 0 - 3
user/apps/test_ebpf/syscall_ebpf/.vscode/settings.json

@@ -1,3 +0,0 @@
-{
-  "rust-analyzer.linkedProjects": ["Cargo.toml", "syscall_ebpf-ebpf/Cargo.toml"]
-}

+ 0 - 3
user/apps/test_ebpf/syscall_ebpf/Cargo.toml

@@ -1,3 +0,0 @@
-[workspace]
-resolver = "2"
-members = ["xtask", "syscall_ebpf-common"]

+ 0 - 32
user/apps/test_ebpf/syscall_ebpf/README.md

@@ -1,32 +0,0 @@
-# syscall_ebpf
-
-## Prerequisites
-
-1. Install bpf-linker: `cargo install bpf-linker`
-
-## Build eBPF
-
-```bash
-cargo xtask build-ebpf
-```
-
-To perform a release build you can use the `--release` flag.
-You may also change the target architecture with the `--target` flag.
-
-## Build Userspace
-
-```bash
-cargo build
-```
-
-## Build eBPF and Userspace
-
-```bash
-cargo xtask build
-```
-
-## Run
-
-```bash
-RUST_LOG=info cargo xtask run
-```

+ 0 - 6
user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.cargo/config.toml

@@ -1,6 +0,0 @@
-[build]
-target-dir = "../target"
-target = "bpfel-unknown-none"
-
-[unstable]
-build-std = ["core"]

+ 0 - 2
user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.helix/config.toml

@@ -1,2 +0,0 @@
-[editor]
-workspace-lsp-roots = []

+ 0 - 4
user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.vim/coc-settings.json

@@ -1,4 +0,0 @@
-{
-    "rust-analyzer.cargo.target": "bpfel-unknown-none",
-    "rust-analyzer.checkOnSave.allTargets": false
-}

+ 0 - 4
user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.vscode/settings.json

@@ -1,4 +0,0 @@
-{
-    "rust-analyzer.cargo.target": "bpfel-unknown-none",
-    "rust-analyzer.checkOnSave.allTargets": false
-}

+ 0 - 33
user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/Cargo.toml

@@ -1,33 +0,0 @@
-[package]
-name = "syscall_ebpf-ebpf"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-aya-ebpf = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/aya.git", rev = "3d57d35" }
-aya-log-ebpf = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/aya.git", rev = "3d57d35" }
-
-syscall_ebpf-common = { path = "../syscall_ebpf-common" }
-
-[[bin]]
-name = "syscall_ebpf"
-path = "src/main.rs"
-
-[profile.dev]
-opt-level = 3
-debug = false
-debug-assertions = false
-overflow-checks = false
-lto = true
-panic = "abort"
-incremental = false
-codegen-units = 1
-rpath = false
-
-[profile.release]
-lto = true
-panic = "abort"
-codegen-units = 1
-
-[workspace]
-members = []

+ 0 - 13
user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/rust-toolchain.toml

@@ -1,13 +0,0 @@
-[toolchain]
-channel = "nightly-2024-11-05"
-# The source code of rustc, provided by the rust-src component, is needed for
-# building eBPF programs.
-components = [
-    "cargo",
-    "clippy",
-    "rust-docs",
-    "rust-src",
-    "rust-std",
-    "rustc",
-    "rustfmt",
-]

Some files were not shown because too many files changed in this diff