浏览代码

添加thread和futex机制 (#411)

* 初步实现clone系统调用

* 实现了线程,初步实现futex机制,添加了几个小的系统调用

* 更改pcb引用计数问题

* 解决死锁bug

---------

Co-authored-by: LoGin <longjin@DragonOS.org>
GnoCiYeH 1 年之前
父节点
当前提交
971462be94

+ 44 - 19
kernel/src/arch/x86_64/process/mod.rs

@@ -5,7 +5,11 @@ use core::{
     sync::atomic::{compiler_fence, Ordering},
 };
 
-use alloc::{string::String, sync::Arc, vec::Vec};
+use alloc::{
+    string::String,
+    sync::{Arc, Weak},
+    vec::Vec,
+};
 
 use memoffset::offset_of;
 use x86::{controlregs::Cr4, segmentation::SegmentSelector};
@@ -20,14 +24,16 @@ use crate::{
         VirtAddr,
     },
     process::{
-        fork::CloneFlags, KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager,
-        SwitchResult, SWITCH_RESULT,
+        fork::{CloneFlags, KernelCloneArgs},
+        KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager, SwitchResult,
+        SWITCH_RESULT,
     },
     syscall::{Syscall, SystemError},
 };
 
 use self::{
     kthread::kernel_thread_bootstrap_stage1,
+    syscall::ARCH_SET_FS,
     table::{switch_fs_and_gs, KERNEL_DS, USER_DS},
 };
 
@@ -38,6 +44,9 @@ pub mod kthread;
 pub mod syscall;
 pub mod table;
 
+pub const IA32_FS_BASE: u32 = 0xC000_0100;
+pub const IA32_GS_BASE: u32 = 0xC000_0101;
+
 extern "C" {
     /// 从中断返回
     fn ret_from_intr();
@@ -175,7 +184,7 @@ impl ArchPCBInfo {
         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
             self.fsbase = x86::current::segmentation::rdfsbase() as usize;
         } else {
-            self.fsbase = 0;
+            self.fsbase = x86::msr::rdmsr(IA32_FS_BASE) as usize;
         }
     }
 
@@ -183,19 +192,23 @@ impl ArchPCBInfo {
         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
             self.gsbase = x86::current::segmentation::rdgsbase() as usize;
         } else {
-            self.gsbase = 0;
+            self.gsbase = x86::msr::rdmsr(IA32_GS_BASE) as usize;
         }
     }
 
     pub unsafe fn restore_fsbase(&mut self) {
         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
             x86::current::segmentation::wrfsbase(self.fsbase as u64);
+        } else {
+            x86::msr::wrmsr(IA32_FS_BASE, self.fsbase as u64);
         }
     }
 
     pub unsafe fn restore_gsbase(&mut self) {
         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
             x86::current::segmentation::wrgsbase(self.gsbase as u64);
+        } else {
+            x86::msr::wrmsr(IA32_GS_BASE, self.gsbase as u64);
         }
     }
 
@@ -228,11 +241,11 @@ impl ProcessControlBlock {
             panic!("current_pcb is null");
         }
         unsafe {
-            // 为了防止内核栈的pcb指针被释放,这里需要将其包装一下,使得Arc的drop不会被调用
-            let arc_wrapper: ManuallyDrop<Arc<ProcessControlBlock>> =
-                ManuallyDrop::new(Arc::from_raw(*p));
+            // 为了防止内核栈的pcb weak 指针被释放,这里需要将其包装一下
+            let weak_wrapper: ManuallyDrop<Weak<ProcessControlBlock>> =
+                ManuallyDrop::new(Weak::from_raw(*p));
 
-            let new_arc: Arc<ProcessControlBlock> = Arc::clone(&arc_wrapper);
+            let new_arc: Arc<ProcessControlBlock> = weak_wrapper.upgrade().unwrap();
             return new_arc;
         }
     }
@@ -255,11 +268,12 @@ impl ProcessManager {
     ///
     /// 由于这个过程与具体的架构相关,所以放在这里
     pub fn copy_thread(
-        _clone_flags: &CloneFlags,
         current_pcb: &Arc<ProcessControlBlock>,
         new_pcb: &Arc<ProcessControlBlock>,
+        clone_args: KernelCloneArgs,
         current_trapframe: &TrapFrame,
     ) -> Result<(), SystemError> {
+        let clone_flags = clone_args.flags;
         let mut child_trapframe = current_trapframe.clone();
 
         // 子进程的返回值为0
@@ -278,6 +292,10 @@ impl ProcessManager {
 
         // 拷贝栈帧
         unsafe {
+            let usp = clone_args.stack;
+            if usp != 0 {
+                child_trapframe.rsp = usp as u64;
+            }
             let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame;
             *trap_frame_ptr = child_trapframe;
         }
@@ -296,15 +314,19 @@ impl ProcessManager {
         drop(current_arch_guard);
 
         // 设置返回地址(子进程开始执行的指令地址)
-
         if new_pcb.flags().contains(ProcessFlags::KTHREAD) {
             let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize;
-
             new_arch_guard.rip = kthread_bootstrap_stage1_func_addr;
         } else {
             new_arch_guard.rip = ret_from_intr as usize;
         }
 
+        // 设置tls
+        if clone_flags.contains(CloneFlags::CLONE_SETTLS) {
+            drop(new_arch_guard);
+            Syscall::do_arch_prctl_64(new_pcb, ARCH_SET_FS, clone_args.tls, true)?;
+        }
+
         return Ok(());
     }
 
@@ -335,28 +357,28 @@ impl ProcessManager {
         compiler_fence(Ordering::SeqCst);
 
         next_addr_space.read().user_mapper.utable.make_current();
+        drop(next_addr_space);
         compiler_fence(Ordering::SeqCst);
         // 切换内核栈
 
         // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁)
-        let next_arch = SpinLockGuard::leak(next.arch_info());
-        let prev_arch = SpinLockGuard::leak(prev.arch_info());
+        let next_arch = SpinLockGuard::leak(next.arch_info()) as *mut ArchPCBInfo;
+        let prev_arch = SpinLockGuard::leak(prev.arch_info()) as *mut ArchPCBInfo;
 
-        prev_arch.rip = switch_back as usize;
+        (*prev_arch).rip = switch_back as usize;
 
         // 恢复当前的 preempt count*2
         ProcessManager::current_pcb().preempt_enable();
         ProcessManager::current_pcb().preempt_enable();
-        SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev.clone());
-        SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next.clone());
 
         // 切换tss
         TSSManager::current_tss().set_rsp(
             x86::Ring::Ring0,
             next.kernel_stack().stack_max_address().data() as u64,
         );
+        SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev);
+        SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next);
         // kdebug!("switch tss ok");
-
         compiler_fence(Ordering::SeqCst);
         // 正式切换上下文
         switch_to_inner(prev_arch, next_arch);
@@ -365,7 +387,7 @@ impl ProcessManager {
 
 /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数
 #[naked]
-unsafe extern "sysv64" fn switch_to_inner(prev: &mut ArchPCBInfo, next: &mut ArchPCBInfo) {
+unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut ArchPCBInfo) {
     asm!(
         // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"):
         //
@@ -394,6 +416,9 @@ unsafe extern "sysv64" fn switch_to_inner(prev: &mut ArchPCBInfo, next: &mut Arc
         mov [rdi + {off_fs}], fs
         mov [rdi + {off_gs}], gs
 
+        // mov fs, [rsi + {off_fs}]
+        // mov gs, [rsi + {off_gs}]
+
         push rbp
         push rax
 

+ 71 - 3
kernel/src/arch/x86_64/process/syscall.rs

@@ -1,4 +1,4 @@
-use alloc::{string::String, vec::Vec};
+use alloc::{string::String, sync::Arc, vec::Vec};
 
 use crate::{
     arch::{
@@ -10,9 +10,9 @@ use crate::{
     mm::ucontext::AddressSpace,
     process::{
         exec::{load_binary_file, ExecParam, ExecParamFlags},
-        ProcessManager,
+        ProcessControlBlock, ProcessManager,
     },
-    syscall::{Syscall, SystemError},
+    syscall::{user_access::UserBufferWriter, Syscall, SystemError},
 };
 
 impl Syscall {
@@ -105,6 +105,8 @@ impl Syscall {
         regs.rflags = 0x200;
         regs.rax = 1;
 
+        drop(param);
+
         // kdebug!("regs: {:?}\n", regs);
 
         // kdebug!(
@@ -114,4 +116,70 @@ impl Syscall {
 
         return Ok(());
     }
+
+    /// ## 用于控制和查询与体系结构相关的进程特定选项
+    pub fn arch_prctl(option: usize, arg2: usize) -> Result<usize, SystemError> {
+        let pcb = ProcessManager::current_pcb();
+        if let Err(SystemError::EINVAL) = Self::do_arch_prctl_64(&pcb, option, arg2, true) {
+            Self::do_arch_prctl_common(option, arg2)?;
+        }
+        Ok(0)
+    }
+
+    /// ## 64位下控制fs/gs base寄存器的方法
+    pub fn do_arch_prctl_64(
+        pcb: &Arc<ProcessControlBlock>,
+        option: usize,
+        arg2: usize,
+        from_user: bool,
+    ) -> Result<usize, SystemError> {
+        let mut arch_info = pcb.arch_info_irqsave();
+        match option {
+            ARCH_GET_FS => {
+                unsafe { arch_info.save_fsbase() };
+                let mut writer = UserBufferWriter::new(
+                    arg2 as *mut usize,
+                    core::mem::size_of::<usize>(),
+                    from_user,
+                )?;
+                writer.copy_one_to_user(&arch_info.fsbase, 0)?;
+            }
+            ARCH_GET_GS => {
+                unsafe { arch_info.save_gsbase() };
+                let mut writer = UserBufferWriter::new(
+                    arg2 as *mut usize,
+                    core::mem::size_of::<usize>(),
+                    from_user,
+                )?;
+                writer.copy_one_to_user(&arch_info.gsbase, 0)?;
+            }
+            ARCH_SET_FS => {
+                arch_info.fsbase = arg2;
+                // 如果是当前进程则直接写入寄存器
+                if pcb.pid() == ProcessManager::current_pcb().pid() {
+                    unsafe { arch_info.restore_fsbase() }
+                }
+            }
+            ARCH_SET_GS => {
+                arch_info.gsbase = arg2;
+                if pcb.pid() == ProcessManager::current_pcb().pid() {
+                    unsafe { arch_info.restore_gsbase() }
+                }
+            }
+            _ => {
+                return Err(SystemError::EINVAL);
+            }
+        }
+        Ok(0)
+    }
+
+    #[allow(dead_code)]
+    pub fn do_arch_prctl_common(_option: usize, _arg2: usize) -> Result<usize, SystemError> {
+        todo!("do_arch_prctl_common not unimplemented");
+    }
 }
+
+pub const ARCH_SET_GS: usize = 0x1001;
+pub const ARCH_SET_FS: usize = 0x1002;
+pub const ARCH_GET_FS: usize = 0x1003;
+pub const ARCH_GET_GS: usize = 0x1004;

+ 33 - 0
kernel/src/arch/x86_64/rand.rs

@@ -1,5 +1,38 @@
 use core::arch::x86_64::_rdtsc;
 
+use alloc::vec::Vec;
+
+use crate::{
+    libs::rand::GRandFlags,
+    syscall::{user_access::UserBufferWriter, Syscall, SystemError},
+};
+
 pub fn rand() -> usize {
     return unsafe { (_rdtsc() * _rdtsc() + 998244353_u64 * _rdtsc()) as usize };
 }
+
+impl Syscall {
+    /// ## 将随机字节填入buf
+    ///
+    /// ### 该系统调用与linux不一致,因为目前没有其他随机源
+    pub fn get_random(buf: *mut u8, len: usize, flags: GRandFlags) -> Result<usize, SystemError> {
+        if flags.bits() == (GRandFlags::GRND_INSECURE.bits() | GRandFlags::GRND_RANDOM.bits()) {
+            return Err(SystemError::EINVAL);
+        }
+
+        let mut writer = UserBufferWriter::new(buf, len, true)?;
+
+        let mut ret = Vec::new();
+        let mut count = 0;
+        while count < len {
+            let rand = rand();
+            for offset in 0..4 {
+                ret.push((rand >> offset * 2) as u8);
+                count += 1;
+            }
+        }
+
+        writer.copy_to_user(&ret, 0)?;
+        Ok(len)
+    }
+}

+ 23 - 4
kernel/src/driver/net/e1000e/e1000e.rs

@@ -16,7 +16,7 @@ use crate::driver::pci::pci::{
 };
 use crate::driver::pci::pci_irq::{IrqCommonMsg, IrqMsg, IrqSpecificMsg, PciInterrupt, IRQ};
 use crate::include::bindings::bindings::pt_regs;
-use crate::libs::volatile::{ReadOnly, Volatile, VolatileReadable, VolatileWritable, WriteOnly};
+use crate::libs::volatile::{ReadOnly, Volatile, WriteOnly};
 use crate::net::net_core::poll_ifaces_try_lock_onetime;
 use crate::{kdebug, kinfo};
 
@@ -45,8 +45,10 @@ const E1000E_DEVICE_ID: [u16; 14] = [
 // BAR0空间大小(128KB)
 const E1000E_BAR_REG_SIZE: u32 = 128 * 1024;
 // BAR0空间对齐(64bit)
+#[allow(dead_code)]
 const E1000E_BAR_REG_ALIGN: u8 = 64;
 // 单个寄存器大小(32bit, 4字节)
+#[allow(dead_code)]
 const E1000E_REG_SIZE: u8 = 4;
 
 // TxBuffer和RxBuffer的大小(DMA页)
@@ -112,11 +114,13 @@ impl E1000EBuffer {
         }
     }
 
+    #[allow(dead_code)]
     pub fn as_addr(&self) -> NonNull<u8> {
         assert!(self.length != 0);
         return self.buffer;
     }
 
+    #[allow(dead_code)]
     pub fn as_addr_u64(&self) -> u64 {
         assert!(self.length != 0);
         return self.buffer.as_ptr() as u64;
@@ -127,6 +131,7 @@ impl E1000EBuffer {
         return self.paddr;
     }
 
+    #[allow(dead_code)]
     pub fn as_slice(&self) -> &[u8] {
         assert!(self.length != 0);
         return unsafe { from_raw_parts(self.buffer.as_ptr(), self.length) };
@@ -154,10 +159,11 @@ impl E1000EBuffer {
 
 // 中断处理函数, 调用协议栈的poll函数,未来可能会用napi来替换这里
 // Interrupt handler
-unsafe extern "C" fn e1000e_irq_handler(irq_num: u64, irq_paramer: u64, regs: *mut pt_regs) {
+unsafe extern "C" fn e1000e_irq_handler(_irq_num: u64, _irq_paramer: u64, _regs: *mut pt_regs) {
     poll_ifaces_try_lock_onetime().ok();
 }
 
+#[allow(dead_code)]
 pub struct E1000EDevice {
     // 设备寄存器
     // device registers
@@ -194,6 +200,7 @@ pub struct E1000EDevice {
 impl E1000EDevice {
     // 从PCI标准设备进行驱动初始化
     // init the device for PCI standard device struct
+    #[allow(unused_assignments)]
     pub fn new(device: &mut PciDeviceStructureGeneralDevice) -> Result<Self, E1000EPciError> {
         // 从BAR0获取我们需要的寄存器
         // Build registers sturcts from BAR0
@@ -479,6 +486,7 @@ impl E1000EDevice {
     // 切换是否接受分组到达的中断
     // change whether the receive timer interrupt is enabled
     // Note: this method is not completely implemented and not used in the current version
+    #[allow(dead_code)]
     pub fn e1000e_intr_set(&mut self, state: bool) {
         let mut ims = unsafe { volread!(self.interrupt_regs, ims) };
         match state {
@@ -491,6 +499,7 @@ impl E1000EDevice {
     // 实现了一部分napi机制的收包函数, 现在还没有投入使用
     // This method is a partial implementation of napi (New API) techniques
     // Note: this method is not completely implemented and not used in the current version
+    #[allow(dead_code)]
     pub fn e1000e_receive2(&mut self) -> Option<E1000EBuffer> {
         // 向设备表明我们已经接受到了之前的中断
         // Tell e1000e we have received the interrupt
@@ -585,8 +594,8 @@ pub extern "C" fn rs_e1000e_init() {
 
 pub fn e1000e_init() -> () {
     match e1000e_probe() {
-        Ok(code) => kinfo!("Successfully init e1000e device!"),
-        Err(error) => kinfo!("Error occurred!"),
+        Ok(_code) => kinfo!("Successfully init e1000e device!"),
+        Err(_error) => kinfo!("Error occurred!"),
     }
 }
 
@@ -618,6 +627,7 @@ pub fn e1000e_probe() -> Result<u64, E1000EPciError> {
 // 用到的e1000e寄存器结构体
 // pp.275, Table 13-3
 // 设备通用寄存器
+#[allow(dead_code)]
 struct GeneralRegs {
     ctrl: Volatile<u32>,         //0x00000
     ctrl_alias: Volatile<u32>,   //0x00004
@@ -630,6 +640,7 @@ struct GeneralRegs {
     mdic: Volatile<u32>,         //0x00020
 }
 // 中断控制
+#[allow(dead_code)]
 struct InterruptRegs {
     icr: Volatile<u32>, //0x000c0 ICR寄存器应当为只读寄存器,但我们需要向其中写入来清除对应位
     itr: Volatile<u32>, //0x000c4
@@ -644,6 +655,7 @@ struct ReceiveCtrlRegs {
     rctl: Volatile<u32>, //0x00100
 }
 // 发包功能控制
+#[allow(dead_code)]
 struct TransmitCtrlRegs {
     tctl: Volatile<u32>,     //0x00400
     tctl_ext: Volatile<u32>, //0x00404
@@ -652,6 +664,7 @@ struct TransmitCtrlRegs {
     tipg: Volatile<u32>,     //0x00410
 }
 // 收包功能相关
+#[allow(dead_code)]
 struct ReceiveRegs {
     rdbal0: Volatile<u32>,     //0x02800
     rdbah0: Volatile<u32>,     //0x02804
@@ -666,6 +679,7 @@ struct ReceiveRegs {
     rxdctl: Volatile<u32>,     //0x2828
 }
 // 发包功能相关
+#[allow(dead_code)]
 struct TransimitRegs {
     tdbal0: Volatile<u32>,      //0x03800
     tdbah0: Volatile<u32>,      //0x03804
@@ -689,6 +703,7 @@ struct ReceiveAddressRegs {
 struct PCIeRegs {
     gcr: Volatile<u32>, //0x05b00
 }
+#[allow(dead_code)]
 struct StatisticsRegs {}
 
 // 0x05200-0x053fc
@@ -714,15 +729,19 @@ const E1000E_CTRL_SLU: u32 = 1 << 6;
 const E1000E_CTRL_FRCSPD: u32 = 1 << 11;
 const E1000E_CTRL_FRCDPLX: u32 = 1 << 12;
 const E1000E_CTRL_RST: u32 = 1 << 26;
+#[allow(dead_code)]
 const E1000E_CTRL_RFCE: u32 = 1 << 27;
+#[allow(dead_code)]
 const E1000E_CTRL_TFCE: u32 = 1 << 28;
 const E1000E_CTRL_PHY_RST: u32 = 1 << 31;
 
 // IMS
 const E1000E_IMS_LSC: u32 = 1 << 2;
 const E1000E_IMS_RXDMT0: u32 = 1 << 4;
+#[allow(dead_code)]
 const E1000E_IMS_RXO: u32 = 1 << 6;
 const E1000E_IMS_RXT0: u32 = 1 << 7;
+#[allow(dead_code)]
 const E1000E_IMS_RXQ0: u32 = 1 << 20;
 const E1000E_IMS_OTHER: u32 = 1 << 24; // qemu use this bit to set msi-x interrupt
 

+ 3 - 7
kernel/src/driver/net/e1000e/e1000e_driver.rs

@@ -3,16 +3,12 @@
 use crate::{
     driver::{
         base::{
-            device::{
-                bus::Bus,
-                driver::{Driver, DriverError},
-                Device, DevicePrivateData, IdTable,
-            },
+            device::{bus::Bus, driver::Driver, Device, IdTable},
             kobject::{KObjType, KObject, KObjectState},
         },
         net::NetDriver,
     },
-    kdebug, kinfo,
+    kinfo,
     libs::spinlock::SpinLock,
     net::{generate_iface_id, NET_DRIVERS},
     syscall::SystemError,
@@ -84,7 +80,7 @@ impl phy::RxToken for E1000ERxToken {
 }
 
 impl phy::TxToken for E1000ETxToken {
-    fn consume<R, F>(self, len: usize, f: F) -> R
+    fn consume<R, F>(self, _len: usize, f: F) -> R
     where
         F: FnOnce(&mut [u8]) -> R,
     {

+ 1 - 1
kernel/src/driver/net/virtio_net.rs

@@ -310,7 +310,7 @@ impl<T: Transport + 'static> NetDriver for VirtioInterface<T> {
         let mut guard = self.iface.lock();
         let poll_res = guard.poll(timestamp, self.driver.force_get_mut(), sockets);
         // todo: notify!!!
-        kdebug!("Virtio Interface poll:{poll_res}");
+        // kdebug!("Virtio Interface poll:{poll_res}");
         if poll_res {
             return Ok(());
         }

+ 24 - 1
kernel/src/filesystem/vfs/syscall.rs

@@ -151,7 +151,6 @@ impl Syscall {
     /// @return 文件描述符编号,或者是错误码
     pub fn open(path: &str, mode: FileMode) -> Result<usize, SystemError> {
         // kdebug!("open: path: {}, mode: {:?}", path, mode);
-
         // 文件名过长
         if path.len() > MAX_PATHLEN as usize {
             return Err(SystemError::ENAMETOOLONG);
@@ -727,6 +726,13 @@ impl Syscall {
         return Ok(kstat);
     }
 
+    fn do_stat(path: &str) -> Result<PosixKstat, SystemError> {
+        let fd = Self::open(path, FileMode::O_RDONLY)?;
+        let ret = Self::do_fstat(fd as i32);
+        Self::close(fd)?;
+        ret
+    }
+
     pub fn fstat(fd: i32, usr_kstat: *mut PosixKstat) -> Result<usize, SystemError> {
         let kstat = Self::do_fstat(fd)?;
         if usr_kstat.is_null() {
@@ -738,6 +744,14 @@ impl Syscall {
         return Ok(0);
     }
 
+    pub fn stat(path: &str, user_kstat: *mut PosixKstat) -> Result<usize, SystemError> {
+        let fd = Self::open(path, FileMode::O_RDONLY)?;
+        Self::fstat(fd as i32, user_kstat).map_err(|e| {
+            Self::close(fd).ok();
+            e
+        })
+    }
+
     pub fn mknod(
         path_ptr: *const i8,
         mode: ModeType,
@@ -771,6 +785,15 @@ impl Syscall {
 
         return Ok(0);
     }
+
+    pub fn writev(fd: i32, iov: usize, count: usize) -> Result<usize, SystemError> {
+        // IoVecs会进行用户态检验
+        let iovecs = unsafe { IoVecs::from_user(iov as *const IoVec, count, false) }?;
+
+        let data = iovecs.gather();
+
+        Self::write(fd, &data)
+    }
 }
 #[repr(C)]
 #[derive(Debug, Clone, Copy)]

+ 1 - 0
kernel/src/lib.rs

@@ -20,6 +20,7 @@
 #![feature(ptr_to_from_bits)]
 #![feature(concat_idents)]
 #![cfg_attr(target_os = "none", no_std)]
+#![feature(atomic_mut_ptr)]
 
 #[cfg(test)]
 #[macro_use]

+ 54 - 0
kernel/src/libs/futex/constant.rs

@@ -0,0 +1,54 @@
+bitflags! {
+    pub struct FutexArg: u32 {
+        const FUTEX_WAIT = 0;
+        const FUTEX_WAKE = 1;
+        const FUTEX_FD = 2;
+        const FUTEX_REQUEUE = 3;
+        const FUTEX_CMP_REQUEUE = 4;
+        const FUTEX_WAKE_OP = 5;
+        const FUTEX_LOCK_PI = 6;
+        const FUTEX_UNLOCK_PI = 7;
+        const FUTEX_TRYLOCK_PI = 8;
+        const FUTEX_WAIT_BITSET = 9;
+        const FUTEX_WAKE_BITSET = 10;
+        const FUTEX_WAIT_REQUEUE_PI = 11;
+        const FUTEX_CMP_REQUEUE_PI = 12;
+        const FUTEX_LOCK_PI2 = 13;
+    }
+
+    pub struct FutexFlag: u32 {
+        const FLAGS_MATCH_NONE = 0x01;
+        const FLAGS_SHARED = 0x01;
+        const FLAGS_CLOCKRT = 0x02;
+        const FLAGS_HAS_TIMEOUT = 0x04;
+        const FUTEX_PRIVATE_FLAG = 128;
+        const FUTEX_CLOCK_REALTIME = 256;
+        const FUTEX_CMD_MASK = !(Self::FUTEX_PRIVATE_FLAG.bits() | Self::FUTEX_CLOCK_REALTIME.bits());
+    }
+
+    pub struct FutexOP: u32 {
+        const FUTEX_OP_SET = 0;
+        const FUTEX_OP_ADD = 1;
+        const FUTEX_OP_OR = 2;
+        const FUTEX_OP_ANDN = 3;
+        const FUTEX_OP_XOR = 4;
+        const FUTEX_OP_OPARG_SHIFT = 8;
+    }
+
+    pub struct FutexOpCMP: u32 {
+        const FUTEX_OP_CMP_EQ = 0;
+        const FUTEX_OP_CMP_NE = 1;
+        const FUTEX_OP_CMP_LT = 2;
+        const FUTEX_OP_CMP_LE = 3;
+        const FUTEX_OP_CMP_GT = 4;
+        const FUTEX_OP_CMP_GE = 5;
+    }
+}
+
+#[allow(dead_code)]
+pub const FUTEX_WAITERS: u32 = 0x80000000;
+#[allow(dead_code)]
+pub const FUTEX_OWNER_DIED: u32 = 0x40000000;
+#[allow(dead_code)]
+pub const FUTEX_TID_MASK: u32 = 0x3fffffff;
+pub const FUTEX_BITSET_MATCH_ANY: u32 = 0xffffffff;

+ 647 - 0
kernel/src/libs/futex/futex.rs

@@ -0,0 +1,647 @@
+use alloc::{
+    collections::LinkedList,
+    sync::{Arc, Weak},
+};
+use core::hash::{Hash, Hasher};
+use core::{intrinsics::likely, sync::atomic::AtomicU64};
+use hashbrown::HashMap;
+
+use crate::{
+    arch::{sched::sched, CurrentIrqArch, MMArch},
+    exception::InterruptArch,
+    libs::spinlock::{SpinLock, SpinLockGuard},
+    mm::{ucontext::AddressSpace, MemoryManagementArch, VirtAddr},
+    process::{ProcessControlBlock, ProcessManager},
+    syscall::{user_access::UserBufferReader, SystemError},
+    time::{
+        timer::{next_n_us_timer_jiffies, Timer, WakeUpHelper},
+        TimeSpec,
+    },
+};
+
+use super::constant::*;
+
+static mut FUTEX_DATA: Option<FutexData> = None;
+
+pub struct FutexData {
+    data: SpinLock<HashMap<FutexKey, FutexHashBucket>>,
+}
+
+impl FutexData {
+    pub fn futex_map() -> SpinLockGuard<'static, HashMap<FutexKey, FutexHashBucket>> {
+        unsafe { FUTEX_DATA.as_ref().unwrap().data.lock() }
+    }
+
+    pub fn try_remove(key: &FutexKey) -> Option<FutexHashBucket> {
+        unsafe {
+            let mut guard = FUTEX_DATA.as_ref().unwrap().data.lock();
+            if let Some(futex) = guard.get(key) {
+                if futex.chain.is_empty() {
+                    return guard.remove(key);
+                }
+            }
+        }
+        None
+    }
+}
+
+pub struct Futex;
+
+// 对于同一个futex的进程或线程将会在这个bucket等待
+pub struct FutexHashBucket {
+    // 该futex维护的等待队列
+    chain: LinkedList<Arc<FutexObj>>,
+}
+
+impl FutexHashBucket {
+    /// ## 判断是否在bucket里
+    pub fn contains(&self, futex_q: &FutexObj) -> bool {
+        self.chain
+            .iter()
+            .filter(|x| futex_q.pcb.ptr_eq(&x.pcb) && x.key == futex_q.key)
+            .count()
+            != 0
+    }
+
+    /// 让futex_q在该bucket上挂起
+    ///
+    /// 进入该函数前,需要关中断
+    #[inline(always)]
+    pub fn sleep_no_sched(&mut self, futex_q: Arc<FutexObj>) -> Result<(), SystemError> {
+        assert!(CurrentIrqArch::is_irq_enabled() == false);
+        self.chain.push_back(futex_q);
+
+        ProcessManager::mark_sleep(true)?;
+
+        Ok(())
+    }
+
+    /// ## 唤醒队列中的最多nr_wake个进程
+    ///
+    /// return: 唤醒的进程数
+    #[inline(always)]
+    pub fn wake_up(
+        &mut self,
+        key: FutexKey,
+        bitset: Option<u32>,
+        nr_wake: u32,
+    ) -> Result<usize, SystemError> {
+        let mut count = 0;
+        let mut pop_count = 0;
+        while let Some(futex_q) = self.chain.pop_front() {
+            if futex_q.key == key {
+                // TODO: 考虑优先级继承的机制
+
+                if let Some(bitset) = bitset {
+                    if futex_q.bitset != bitset {
+                        self.chain.push_back(futex_q);
+                        continue;
+                    }
+                }
+
+                // 唤醒
+                if futex_q.pcb.upgrade().is_some() {
+                    self.remove(futex_q.clone());
+                    ProcessManager::wakeup(&futex_q.pcb.upgrade().unwrap())?;
+                }
+
+                // 判断唤醒数
+                count += 1;
+                if count >= nr_wake {
+                    break;
+                }
+            } else {
+                self.chain.push_back(futex_q);
+            }
+            // 判断是否循环完队列了
+            pop_count += 1;
+            if pop_count >= self.chain.len() {
+                break;
+            }
+        }
+        Ok(count as usize)
+    }
+
+    /// 将FutexObj从bucket中删除
+    pub fn remove(&mut self, futex: Arc<FutexObj>) {
+        self.chain.drain_filter(|x| Arc::ptr_eq(x, &futex));
+    }
+}
+
+#[derive(Debug)]
+pub struct FutexObj {
+    pcb: Weak<ProcessControlBlock>,
+    key: FutexKey,
+    bitset: u32,
+    // TODO: 优先级继承
+}
+
+pub enum FutexAccess {
+    FutexRead,
+    FutexWrite,
+}
+
+#[allow(dead_code)]
+#[derive(Hash, PartialEq, Eq, Clone, Debug)]
+/// ### 用于定位内核唯一的futex
+pub enum InnerFutexKey {
+    Shared(SharedKey),
+    Private(PrivateKey),
+}
+
+#[derive(Hash, PartialEq, Eq, Clone, Debug)]
+pub struct FutexKey {
+    ptr: u64,
+    word: u64,
+    offset: u32,
+    key: InnerFutexKey,
+}
+
+/// 不同进程间通过文件共享futex变量,表明该变量在文件中的位置
+#[derive(Hash, PartialEq, Eq, Clone, Debug)]
+pub struct SharedKey {
+    i_seq: u64,
+    page_offset: u64,
+}
+
+/// 同一进程的不同线程共享futex变量,表明该变量在进程地址空间中的位置
+#[derive(Clone, Debug)]
+pub struct PrivateKey {
+    // 所在的地址空间
+    address_space: Option<Weak<AddressSpace>>,
+    // 表示所在页面的初始地址
+    address: u64,
+}
+
+impl Hash for PrivateKey {
+    fn hash<H: ~const Hasher>(&self, state: &mut H) {
+        self.address.hash(state);
+    }
+}
+
+impl Eq for PrivateKey {}
+
+impl PartialEq for PrivateKey {
+    fn eq(&self, other: &Self) -> bool {
+        if self.address_space.is_none() && other.address_space.is_none() {
+            return self.address == other.address;
+        } else {
+            return self
+                .address_space
+                .as_ref()
+                .unwrap_or(&Weak::default())
+                .ptr_eq(&other.address_space.as_ref().unwrap_or(&Weak::default()))
+                && self.address == other.address;
+        }
+    }
+}
+
+impl Futex {
+    /// ### 初始化FUTEX_DATA
+    pub fn init() {
+        unsafe {
+            FUTEX_DATA = Some(FutexData {
+                data: SpinLock::new(HashMap::new()),
+            })
+        };
+    }
+
+    /// ### 让当前进程在指定futex上等待直到futex_wake显式唤醒
+    pub fn futex_wait(
+        uaddr: VirtAddr,
+        flags: FutexFlag,
+        val: u32,
+        abs_time: Option<TimeSpec>,
+        bitset: u32,
+    ) -> Result<usize, SystemError> {
+        if bitset == 0 {
+            return Err(SystemError::EINVAL);
+        }
+
+        // 获取全局hash表的key值
+        let key = Self::get_futex_key(
+            uaddr,
+            flags.contains(FutexFlag::FLAGS_SHARED),
+            FutexAccess::FutexRead,
+        )?;
+
+        let mut futex_map_guard = FutexData::futex_map();
+        let bucket = futex_map_guard.get_mut(&key);
+        let bucket_mut = match bucket {
+            Some(bucket) => bucket,
+            None => {
+                let bucket = FutexHashBucket {
+                    chain: LinkedList::new(),
+                };
+                futex_map_guard.insert(key.clone(), bucket);
+                futex_map_guard.get_mut(&key).unwrap()
+            }
+        };
+
+        // 使用UserBuffer读取futex
+        let user_reader =
+            UserBufferReader::new(uaddr.as_ptr::<u32>(), core::mem::size_of::<u32>(), true)?;
+
+        // 从用户空间读取到futex的val
+        let mut uval = 0;
+
+        // 读取
+        // 这里只尝试一种方式去读取用户空间,与linux不太一致
+        // 对于linux,如果bucket被锁住时读取失败,将会将bucket解锁后重新读取
+        user_reader.copy_one_from_user::<u32>(&mut uval, 0)?;
+
+        // 不满足wait条件,返回错误
+        if uval != val {
+            return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+        }
+
+        let pcb = ProcessManager::current_pcb();
+        // 创建超时计时器任务
+        let mut timer = None;
+        if !abs_time.is_none() {
+            let time = abs_time.unwrap();
+
+            let wakeup_helper = WakeUpHelper::new(pcb.clone());
+
+            let sec = time.tv_sec;
+            let nsec = time.tv_nsec;
+            let jiffies = next_n_us_timer_jiffies((nsec / 1000 + sec * 1_000_000) as u64);
+
+            let wake_up = Timer::new(wakeup_helper, jiffies);
+
+            wake_up.activate();
+            timer = Some(wake_up);
+        }
+
+        let futex_q = Arc::new(FutexObj {
+            pcb: Arc::downgrade(&pcb),
+            key: key.clone(),
+            bitset,
+        });
+        let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+        // 满足条件则将当前进程在该bucket上挂起
+        bucket_mut.sleep_no_sched(futex_q.clone()).map_err(|e| {
+            kwarn!("error:{e:?}");
+            e
+        })?;
+        drop(bucket_mut);
+        drop(futex_map_guard);
+        drop(irq_guard);
+        sched();
+
+        // 被唤醒后的检查
+        let mut futex_map_guard = FutexData::futex_map();
+        let bucket = futex_map_guard.get_mut(&key);
+        let bucket_mut = match bucket {
+            // 如果该pcb不在链表里面了或者该链表已经被释放,就证明是正常的Wake操作
+            Some(bucket_mut) => {
+                if !bucket_mut.contains(&futex_q) {
+                    // 取消定时器任务
+                    if timer.is_some() {
+                        timer.unwrap().cancel();
+                    }
+                    return Ok(0);
+                }
+                // 非正常唤醒,返回交给下层
+                bucket_mut
+            }
+            None => {
+                // 取消定时器任务
+                if timer.is_some() {
+                    timer.unwrap().cancel();
+                }
+                return Ok(0);
+            }
+        };
+
+        // 如果是超时唤醒,则返回错误
+        if timer.is_some() {
+            if timer.clone().unwrap().timeout() {
+                bucket_mut.remove(futex_q);
+
+                return Err(SystemError::ETIMEDOUT);
+            }
+        }
+
+        // TODO: 如果没有挂起的信号,则重新判断是否满足wait要求,重新进入wait
+
+        // 经过前面的几个判断,到这里之后,
+        // 当前进程被唤醒大概率是其他进程更改了uval,需要重新去判断当前进程是否满足wait
+
+        // 到这里之后,前面的唤醒条件都不满足,则是被信号唤醒
+        // 需要处理信号然后重启futex系统调用
+
+        // 取消定时器任务
+        if timer.is_some() {
+            let timer = timer.unwrap();
+            if !timer.timeout() {
+                timer.cancel();
+            }
+        }
+        Ok(0)
+    }
+
+    // ### 唤醒指定futex上挂起的最多nr_wake个进程
+    pub fn futex_wake(
+        uaddr: VirtAddr,
+        flags: FutexFlag,
+        nr_wake: u32,
+        bitset: u32,
+    ) -> Result<usize, SystemError> {
+        if bitset == 0 {
+            return Err(SystemError::EINVAL);
+        }
+
+        // 获取futex_key,并且判断地址空间合法性
+        let key = Self::get_futex_key(
+            uaddr,
+            flags.contains(FutexFlag::FLAGS_SHARED),
+            FutexAccess::FutexRead,
+        )?;
+        let mut binding = FutexData::futex_map();
+        let bucket_mut = binding.get_mut(&key).ok_or(SystemError::EINVAL)?;
+
+        // 确保后面的唤醒操作是有意义的
+        if bucket_mut.chain.is_empty() {
+            return Ok(0);
+        }
+        // 从队列中唤醒
+        let count = bucket_mut.wake_up(key.clone(), Some(bitset), nr_wake)?;
+
+        drop(bucket_mut);
+        drop(binding);
+
+        FutexData::try_remove(&key);
+
+        Ok(count)
+    }
+
+    /// ### 唤醒制定uaddr1上的最多nr_wake个进程,然后将uaddr1最多nr_requeue个进程移动到uaddr2绑定的futex上
+    pub fn futex_requeue(
+        uaddr1: VirtAddr,
+        flags: FutexFlag,
+        uaddr2: VirtAddr,
+        nr_wake: i32,
+        nr_requeue: i32,
+        cmpval: Option<u32>,
+        requeue_pi: bool,
+    ) -> Result<usize, SystemError> {
+        if nr_requeue < 0 || nr_wake < 0 {
+            return Err(SystemError::EINVAL);
+        }
+
+        // 暂时不支持优先级继承
+        if requeue_pi {
+            return Err(SystemError::ENOSYS);
+        }
+
+        let key1 = Self::get_futex_key(
+            uaddr1,
+            flags.contains(FutexFlag::FLAGS_SHARED),
+            FutexAccess::FutexRead,
+        )?;
+        let key2 = Self::get_futex_key(uaddr2, flags.contains(FutexFlag::FLAGS_SHARED), {
+            match requeue_pi {
+                true => FutexAccess::FutexWrite,
+                false => FutexAccess::FutexRead,
+            }
+        })?;
+
+        if requeue_pi && key1 == key2 {
+            return Err(SystemError::EINVAL);
+        }
+
+        if likely(!cmpval.is_none()) {
+            let uval_reader =
+                UserBufferReader::new(uaddr1.as_ptr::<u32>(), core::mem::size_of::<u32>(), true)?;
+            let curval = uval_reader.read_one_from_user::<u32>(0)?;
+
+            // 判断是否满足条件
+            if *curval != cmpval.unwrap() {
+                return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+            }
+        }
+
+        let mut futex_data_guard = FutexData::futex_map();
+        if !requeue_pi {
+            // 唤醒nr_wake个进程
+            let bucket_1_mut = futex_data_guard.get_mut(&key1).ok_or(SystemError::EINVAL)?;
+            let ret = bucket_1_mut.wake_up(key1.clone(), None, nr_wake as u32)?;
+            drop(bucket_1_mut);
+            // 将bucket1中最多nr_requeue个任务转移到bucket2
+            for _ in 0..nr_requeue {
+                let bucket_1_mut = futex_data_guard.get_mut(&key1).ok_or(SystemError::EINVAL)?;
+                let futex_q = bucket_1_mut.chain.pop_front();
+                match futex_q {
+                    Some(futex_q) => {
+                        let bucket_2_mut =
+                            futex_data_guard.get_mut(&key2).ok_or(SystemError::EINVAL)?;
+                        bucket_2_mut.chain.push_back(futex_q);
+                    }
+                    None => {
+                        break;
+                    }
+                }
+            }
+
+            return Ok(ret);
+        } else {
+            // 暂时不支持优先级继承
+            todo!()
+        }
+    }
+
+    /// ### 唤醒futex上的进程的同时进行一些操作
+    pub fn futex_wake_op(
+        uaddr1: VirtAddr,
+        flags: FutexFlag,
+        uaddr2: VirtAddr,
+        nr_wake: i32,
+        nr_wake2: i32,
+        op: i32,
+    ) -> Result<usize, SystemError> {
+        let key1 = Futex::get_futex_key(
+            uaddr1,
+            flags.contains(FutexFlag::FLAGS_SHARED),
+            FutexAccess::FutexRead,
+        )?;
+        let key2 = Futex::get_futex_key(
+            uaddr2,
+            flags.contains(FutexFlag::FLAGS_SHARED),
+            FutexAccess::FutexWrite,
+        )?;
+
+        let mut futex_data_guard = FutexData::futex_map();
+        let bucket1 = futex_data_guard.get_mut(&key1).ok_or(SystemError::EINVAL)?;
+        let mut wake_count = 0;
+
+        // 唤醒uaddr1中的进程
+        wake_count += bucket1.wake_up(key1, None, nr_wake as u32)?;
+
+        match Self::futex_atomic_op_inuser(op as u32, uaddr2) {
+            Ok(ret) => {
+                // 操作成功则唤醒uaddr2中的进程
+                if ret {
+                    let bucket2 = futex_data_guard.get_mut(&key2).ok_or(SystemError::EINVAL)?;
+                    wake_count += bucket2.wake_up(key2, None, nr_wake2 as u32)?;
+                }
+            }
+            Err(e) => {
+                // TODO:retry?
+                return Err(e);
+            }
+        }
+
+        Ok(wake_count)
+    }
+
+    fn get_futex_key(
+        uaddr: VirtAddr,
+        fshared: bool,
+        _access: FutexAccess,
+    ) -> Result<FutexKey, SystemError> {
+        let mut address = uaddr.data();
+
+        // 计算相对页的偏移量
+        let offset = address & (MMArch::PAGE_SIZE - 1);
+        // 判断内存对齐
+        if !(uaddr.data() & (core::mem::size_of::<u32>() - 1) == 0) {
+            return Err(SystemError::EINVAL);
+        }
+
+        // 目前address指向所在页面的起始地址
+        address -= offset;
+
+        // 若不是进程间共享的futex,则返回Private
+        if !fshared {
+            return Ok(FutexKey {
+                ptr: 0,
+                word: 0,
+                offset: offset as u32,
+                key: InnerFutexKey::Private(PrivateKey {
+                    address: address as u64,
+                    address_space: None,
+                }),
+            });
+        }
+
+        // 获取到地址所在地址空间
+        let address_space = AddressSpace::current()?;
+        // TODO: 判断是否为匿名映射,是匿名映射才返回PrivateKey
+        return Ok(FutexKey {
+            ptr: 0,
+            word: 0,
+            offset: offset as u32,
+            key: InnerFutexKey::Private(PrivateKey {
+                address: address as u64,
+                address_space: Some(Arc::downgrade(&address_space)),
+            }),
+        });
+
+        // 未实现共享内存机制,贡献内存部分应该通过inode构建SharedKey
+        // todo!("Shared memory not implemented");
+    }
+
+    pub fn futex_atomic_op_inuser(encoded_op: u32, uaddr: VirtAddr) -> Result<bool, SystemError> {
+        let op = FutexOP::from_bits((encoded_op & 0x70000000) >> 28).ok_or(SystemError::ENOSYS)?;
+        let cmp =
+            FutexOpCMP::from_bits((encoded_op & 0x0f000000) >> 24).ok_or(SystemError::ENOSYS)?;
+
+        let sign_extend32 = |value: u32, index: i32| {
+            let shift = (31 - index) as u8;
+            return (value << shift) >> shift;
+        };
+
+        let mut oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
+        let cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
+
+        if encoded_op & (FutexOP::FUTEX_OP_OPARG_SHIFT.bits() << 28) != 0 {
+            if oparg > 31 {
+                kwarn!(
+                    "futex_wake_op: pid:{} tries to shift op by {}; fix this program",
+                    ProcessManager::current_pcb().pid().data(),
+                    oparg
+                );
+
+                oparg &= 31;
+            }
+        }
+
+        // TODO: 这个汇编似乎是有问题的,目前不好测试
+        let old_val = Self::arch_futex_atomic_op_inuser(op, oparg, uaddr)?;
+
+        match cmp {
+            FutexOpCMP::FUTEX_OP_CMP_EQ => {
+                return Ok(cmparg == old_val);
+            }
+            FutexOpCMP::FUTEX_OP_CMP_NE => {
+                return Ok(cmparg != old_val);
+            }
+            FutexOpCMP::FUTEX_OP_CMP_LT => {
+                return Ok(cmparg < old_val);
+            }
+            FutexOpCMP::FUTEX_OP_CMP_LE => {
+                return Ok(cmparg <= old_val);
+            }
+            FutexOpCMP::FUTEX_OP_CMP_GE => {
+                return Ok(cmparg >= old_val);
+            }
+            FutexOpCMP::FUTEX_OP_CMP_GT => {
+                return Ok(cmparg > old_val);
+            }
+            _ => {
+                return Err(SystemError::ENOSYS);
+            }
+        }
+    }
+
+    /// ### 对futex进行操作
+    ///
+    /// 进入该方法会关闭中断保证修改的原子性,所以进入该方法前应确保中断锁已释放
+    ///
+    /// ### return uaddr原来的值
+    #[allow(unused_assignments)]
+    pub fn arch_futex_atomic_op_inuser(
+        op: FutexOP,
+        oparg: u32,
+        uaddr: VirtAddr,
+    ) -> Result<u32, SystemError> {
+        let guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+
+        let reader =
+            UserBufferReader::new(uaddr.as_ptr::<u32>(), core::mem::size_of::<u32>(), true)?;
+
+        let oldval = reader.read_one_from_user::<u32>(0)?;
+
+        let atomic_addr = AtomicU64::new(uaddr.data() as u64);
+        // 这个指针是指向指针的指针
+        let ptr = atomic_addr.as_mut_ptr();
+        match op {
+            FutexOP::FUTEX_OP_SET => unsafe {
+                *((*ptr) as *mut u32) = oparg;
+            },
+            FutexOP::FUTEX_OP_ADD => unsafe {
+                *((*ptr) as *mut u32) += oparg;
+            },
+            FutexOP::FUTEX_OP_OR => unsafe {
+                *((*ptr) as *mut u32) |= oparg;
+            },
+            FutexOP::FUTEX_OP_ANDN => unsafe {
+                *((*ptr) as *mut u32) &= oparg;
+            },
+            FutexOP::FUTEX_OP_XOR => unsafe {
+                *((*ptr) as *mut u32) ^= oparg;
+            },
+            _ => return Err(SystemError::ENOSYS),
+        }
+
+        drop(guard);
+
+        Ok(*oldval)
+    }
+}
+
+#[no_mangle]
+unsafe extern "C" fn rs_futex_init() {
+    Futex::init();
+}

+ 3 - 0
kernel/src/libs/futex/mod.rs

@@ -0,0 +1,3 @@
+pub mod constant;
+pub mod futex;
+pub mod syscall;

+ 106 - 0
kernel/src/libs/futex/syscall.rs

@@ -0,0 +1,106 @@
+use crate::{
+    mm::VirtAddr,
+    syscall::{Syscall, SystemError},
+    time::TimeSpec,
+};
+
+use super::{constant::*, futex::Futex};
+
+impl Syscall {
+    pub fn do_futex(
+        uaddr: VirtAddr,
+        operation: FutexFlag,
+        val: u32,
+        timeout: Option<TimeSpec>,
+        uaddr2: VirtAddr,
+        val2: u32,
+        val3: u32,
+    ) -> Result<usize, SystemError> {
+        let cmd = FutexArg::from_bits(operation.bits() & FutexFlag::FUTEX_CMD_MASK.bits())
+            .ok_or(SystemError::ENOSYS)?;
+
+        let mut flags = FutexFlag::FLAGS_MATCH_NONE;
+
+        if !operation.contains(FutexFlag::FUTEX_PRIVATE_FLAG) {
+            flags.insert(FutexFlag::FLAGS_SHARED);
+        }
+
+        if operation.contains(FutexFlag::FUTEX_CLOCK_REALTIME) {
+            flags.insert(FutexFlag::FLAGS_CLOCKRT);
+            if cmd != FutexArg::FUTEX_WAIT_BITSET
+                && cmd != FutexArg::FUTEX_WAIT_REQUEUE_PI
+                && cmd != FutexArg::FUTEX_LOCK_PI2
+            {
+                return Err(SystemError::ENOSYS);
+            }
+        }
+
+        match cmd {
+            FutexArg::FUTEX_WAIT => {
+                return Futex::futex_wait(uaddr, flags, val, timeout, FUTEX_BITSET_MATCH_ANY);
+            }
+            FutexArg::FUTEX_WAIT_BITSET => {
+                return Futex::futex_wait(uaddr, flags, val, timeout, val3);
+            }
+            FutexArg::FUTEX_WAKE => {
+                return Futex::futex_wake(uaddr, flags, val, FUTEX_BITSET_MATCH_ANY);
+            }
+            FutexArg::FUTEX_WAKE_BITSET => {
+                return Futex::futex_wake(uaddr, flags, val, val3);
+            }
+            FutexArg::FUTEX_REQUEUE => {
+                return Futex::futex_requeue(
+                    uaddr,
+                    flags,
+                    uaddr2,
+                    val as i32,
+                    val2 as i32,
+                    None,
+                    false,
+                );
+            }
+            FutexArg::FUTEX_CMP_REQUEUE => {
+                return Futex::futex_requeue(
+                    uaddr,
+                    flags,
+                    uaddr2,
+                    val as i32,
+                    val2 as i32,
+                    Some(val3),
+                    false,
+                );
+            }
+            FutexArg::FUTEX_WAKE_OP => {
+                return Futex::futex_wake_op(
+                    uaddr,
+                    flags,
+                    uaddr2,
+                    val as i32,
+                    val2 as i32,
+                    val3 as i32,
+                );
+            }
+            FutexArg::FUTEX_LOCK_PI => {
+                todo!()
+            }
+            FutexArg::FUTEX_LOCK_PI2 => {
+                todo!()
+            }
+            FutexArg::FUTEX_UNLOCK_PI => {
+                todo!()
+            }
+            FutexArg::FUTEX_TRYLOCK_PI => {
+                todo!()
+            }
+            FutexArg::FUTEX_WAIT_REQUEUE_PI => {
+                todo!()
+            }
+            FutexArg::FUTEX_CMP_REQUEUE_PI => {
+                todo!()
+            }
+            _ => {
+                return Err(SystemError::ENOSYS);
+            }
+        }
+    }
+}

+ 2 - 0
kernel/src/libs/mod.rs

@@ -21,4 +21,6 @@ pub mod spinlock;
 pub mod vec_cursor;
 #[macro_use]
 pub mod volatile;
+pub mod futex;
+pub mod rand;
 pub mod wait_queue;

+ 7 - 0
kernel/src/libs/rand.rs

@@ -0,0 +1,7 @@
+bitflags! {
+    pub struct GRandFlags: u8{
+        const GRND_NONBLOCK = 0x0001;
+        const GRND_RANDOM = 0x0002;
+        const GRND_INSECURE = 0x0004;
+    }
+}

+ 2 - 0
kernel/src/main.c

@@ -38,6 +38,7 @@ extern void rs_kthread_init();
 extern void rs_init_intertrait();
 extern void rs_init_before_mem_init();
 extern int rs_setup_arch();
+extern void rs_futex_init();
 extern int rs_hpet_init();
 extern int rs_hpet_enable();
 extern int rs_tsc_init();
@@ -152,6 +153,7 @@ void system_initialize()
     smp_init();
 
     io_mfence();
+    rs_futex_init();
     cli();
     rs_hpet_init();
     rs_hpet_enable();

+ 20 - 2
kernel/src/mm/ucontext.rs

@@ -706,7 +706,7 @@ impl UserMappings {
     /// 请注意,在调用本函数之前,必须先确定region所在范围内没有VMA。
     fn reserve_hole(&mut self, region: &VirtRegion) {
         let prev_hole: Option<(&VirtAddr, &mut usize)> =
-            self.vm_holes.range_mut(..region.start()).next_back();
+            self.vm_holes.range_mut(..=region.start()).next_back();
 
         if let Some((prev_hole_vaddr, prev_hole_size)) = prev_hole {
             let prev_hole_end = prev_hole_vaddr.add(*prev_hole_size);
@@ -946,6 +946,8 @@ pub struct VMA {
     /// VMA所属的用户地址空间
     user_address_space: Option<Weak<AddressSpace>>,
     self_ref: Weak<LockedVMA>,
+
+    provider: Provider,
 }
 
 impl core::hash::Hash for VMA {
@@ -956,6 +958,12 @@ impl core::hash::Hash for VMA {
     }
 }
 
+/// 描述不同类型的内存提供者或资源
+#[derive(Debug)]
+pub enum Provider {
+    Allocated, // TODO:其他
+}
+
 #[allow(dead_code)]
 impl VMA {
     pub fn region(&self) -> &VirtRegion {
@@ -974,6 +982,7 @@ impl VMA {
             mapped: self.mapped,
             user_address_space: self.user_address_space.clone(),
             self_ref: self.self_ref.clone(),
+            provider: Provider::Allocated,
         };
     }
 
@@ -1019,8 +1028,15 @@ impl VMA {
     ///
     /// - `prot_flags` 要检查的标志位
     pub fn can_have_flags(&self, prot_flags: ProtFlags) -> bool {
-        return (self.flags.has_write() || !prot_flags.contains(ProtFlags::PROT_WRITE))
+        let is_downgrade = (self.flags.has_write() || !prot_flags.contains(ProtFlags::PROT_WRITE))
             && (self.flags.has_execute() || !prot_flags.contains(ProtFlags::PROT_EXEC));
+
+        match self.provider {
+            Provider::Allocated { .. } => true,
+
+            #[allow(unreachable_patterns)]
+            _ => is_downgrade,
+        }
     }
 
     /// 把物理地址映射到虚拟地址
@@ -1070,6 +1086,7 @@ impl VMA {
             mapped: true,
             user_address_space: None,
             self_ref: Weak::default(),
+            provider: Provider::Allocated,
         });
         return Ok(r);
     }
@@ -1118,6 +1135,7 @@ impl VMA {
             mapped: true,
             user_address_space: None,
             self_ref: Weak::default(),
+            provider: Provider::Allocated,
         });
         drop(flusher);
         // kdebug!("VMA::zeroed: flusher dropped");

+ 12 - 0
kernel/src/process/c_adapter.rs

@@ -36,16 +36,25 @@ pub extern "C" fn rs_current_pcb_pid() -> u32 {
 
 #[no_mangle]
 pub extern "C" fn rs_current_pcb_preempt_count() -> u32 {
+    if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } {
+        return 0;
+    }
     return ProcessManager::current_pcb().preempt_count() as u32;
 }
 
 #[no_mangle]
 pub extern "C" fn rs_current_pcb_flags() -> u32 {
+    if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } {
+        return 0;
+    }
     return ProcessManager::current_pcb().flags().bits() as u32;
 }
 
 #[no_mangle]
 pub extern "C" fn rs_current_pcb_thread_rbp() -> u64 {
+    if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } {
+        return 0;
+    }
     return ProcessManager::current_pcb().arch_info_irqsave().rbp() as u64;
 }
 
@@ -61,5 +70,8 @@ pub extern "C" fn rs_preempt_enable() {
 
 #[no_mangle]
 pub extern "C" fn rs_process_do_exit(exit_code: usize) -> usize {
+    if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } {
+        return 0;
+    }
     ProcessManager::exit(exit_code);
 }

+ 252 - 63
kernel/src/process/fork.rs

@@ -1,9 +1,13 @@
 use alloc::{string::ToString, sync::Arc};
 
 use crate::{
-    arch::interrupt::TrapFrame, filesystem::procfs::procfs_register_pid,
-    ipc::signal::flush_signal_handlers, libs::rwlock::RwLock, process::ProcessFlags,
-    syscall::SystemError,
+    arch::interrupt::TrapFrame,
+    filesystem::procfs::procfs_register_pid,
+    ipc::signal::flush_signal_handlers,
+    libs::rwlock::RwLock,
+    mm::VirtAddr,
+    process::ProcessFlags,
+    syscall::{user_access::UserBufferWriter, SystemError},
 };
 
 use super::{
@@ -13,21 +17,116 @@ use super::{
 
 bitflags! {
     /// 进程克隆标志
-    pub struct CloneFlags: u32 {
+    pub struct CloneFlags: u64 {
+        /// 在进程间共享虚拟内存空间
+        const CLONE_VM = 0x00000100;
         /// 在进程间共享文件系统信息
-        const CLONE_FS = (1 << 0);
-        /// 克隆时,与父进程共享信号结构体
-        const CLONE_SIGNAL = (1 << 1);
+        const CLONE_FS = 0x00000200;
+        /// 共享打开的文件
+        const CLONE_FILES = 0x00000400;
         /// 克隆时,与父进程共享信号处理结构体
-        const CLONE_SIGHAND = (1 << 2);
-        /// 克隆时,将原本被设置为SIG_IGNORE的信号,设置回SIG_DEFAULT
-        const CLONE_CLEAR_SIGHAND = (1 << 3);
-        /// 在进程间共享虚拟内存空间
-        const CLONE_VM = (1 << 4);
+        const CLONE_SIGHAND = 0x00000800;
+        /// 返回进程的文件描述符
+        const CLONE_PIDFD = 0x00001000;
+        /// 使克隆对象成为父进程的跟踪对象
+        const CLONE_PTRACE = 0x00002000;
+        /// 在执行 exec() 或 _exit() 之前挂起父进程的执行
+        const CLONE_VFORK = 0x00004000;
+        /// 使克隆对象的父进程为调用进程的父进程
+        const CLONE_PARENT = 0x00008000;
         /// 拷贝线程
-        const CLONE_THREAD = (1 << 5);
-        /// 共享打开的文件
-        const CLONE_FILES = (1 << 6);
+        const CLONE_THREAD = 0x00010000;
+        /// 创建一个新的命名空间,其中包含独立的文件系统挂载点层次结构。
+        const CLONE_NEWNS =	0x00020000;
+        /// 与父进程共享 System V 信号量。
+        const CLONE_SYSVSEM = 0x00040000;
+        /// 设置其线程本地存储
+        const CLONE_SETTLS = 0x00080000;
+        /// 设置partent_tid地址为子进程线程 ID
+        const CLONE_PARENT_SETTID = 0x00100000;
+        /// 在子进程中设置一个清除线程 ID 的用户空间地址
+        const CLONE_CHILD_CLEARTID = 0x00200000;
+        /// 创建一个新线程,将其设置为分离状态
+        const CLONE_DETACHED = 0x00400000;
+        /// 使其在创建者进程或线程视角下成为无法跟踪的。
+        const CLONE_UNTRACED = 0x00800000;
+        /// 设置其子进程线程 ID
+        const CLONE_CHILD_SETTID = 0x01000000;
+        /// 将其放置在一个新的 cgroup 命名空间中
+        const CLONE_NEWCGROUP = 0x02000000;
+        /// 将其放置在一个新的 UTS 命名空间中
+        const CLONE_NEWUTS = 0x04000000;
+        /// 将其放置在一个新的 IPC 命名空间中
+        const CLONE_NEWIPC = 0x08000000;
+        /// 将其放置在一个新的用户命名空间中
+        const CLONE_NEWUSER = 0x10000000;
+        /// 将其放置在一个新的 PID 命名空间中
+        const CLONE_NEWPID = 0x20000000;
+        /// 将其放置在一个新的网络命名空间中
+        const CLONE_NEWNET = 0x40000000;
+        /// 在新的 I/O 上下文中运行它
+        const CLONE_IO = 0x80000000;
+        /// 克隆时,与父进程共享信号结构体
+        const CLONE_SIGNAL = 0x00010000 | 0x00000800;
+        /// 克隆时,将原本被设置为SIG_IGNORE的信号,设置回SIG_DEFAULT
+        const CLONE_CLEAR_SIGHAND = 0x100000000;
+    }
+}
+
+/// ## clone与clone3系统调用的参数载体
+///
+/// 因为这两个系统调用的参数很多,所以有这样一个载体更灵活
+///
+/// 仅仅作为参数传递
+#[derive(Debug, Clone, Copy)]
+pub struct KernelCloneArgs {
+    pub flags: CloneFlags,
+
+    // 下列属性均来自用户空间
+    pub pidfd: VirtAddr,
+    pub child_tid: VirtAddr,
+    pub parent_tid: VirtAddr,
+    pub set_tid: VirtAddr,
+
+    pub exit_signal: i32,
+
+    pub stack: usize,
+    // clone3用到
+    pub stack_size: usize,
+    pub tls: usize,
+
+    pub set_tid_size: usize,
+    pub cgroup: i32,
+
+    pub io_thread: bool,
+    pub kthread: bool,
+    pub idle: bool,
+    pub func: VirtAddr,
+    pub fn_arg: VirtAddr,
+    // cgrp 和 cset?
+}
+
+impl KernelCloneArgs {
+    pub fn new() -> Self {
+        let null_addr = VirtAddr::new(0);
+        Self {
+            flags: unsafe { CloneFlags::from_bits_unchecked(0) },
+            pidfd: null_addr,
+            child_tid: null_addr,
+            parent_tid: null_addr,
+            set_tid: null_addr,
+            exit_signal: 0,
+            stack: 0,
+            stack_size: 0,
+            tls: 0,
+            set_tid_size: 0,
+            cgroup: 0,
+            io_thread: false,
+            kthread: false,
+            idle: false,
+            func: null_addr,
+            fn_arg: null_addr,
+        }
     }
 }
 
@@ -56,53 +155,9 @@ impl ProcessManager {
         let name = current_pcb.basic().name().to_string();
         let pcb = ProcessControlBlock::new(name, new_kstack);
 
-        // 克隆架构相关信息
-        *pcb.arch_info() = current_pcb.arch_info_irqsave().clone();
-
-        // 为内核线程设置worker private字段。(也许由内核线程机制去做会更好?)
-        if current_pcb.flags().contains(ProcessFlags::KTHREAD) {
-            *pcb.worker_private() = Some(WorkerPrivate::KernelThread(KernelThreadPcbPrivate::new()))
-        }
-
-        // 拷贝标志位
-        ProcessManager::copy_flags(&clone_flags, &pcb).unwrap_or_else(|e| {
-            panic!(
-                "fork: Failed to copy flags from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
-                current_pcb.pid(), pcb.pid(), e
-            )
-        });
-
-        // 拷贝用户地址空间
-        ProcessManager::copy_mm(&clone_flags, &current_pcb, &pcb).unwrap_or_else(|e| {
-            panic!(
-                "fork: Failed to copy mm from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
-                current_pcb.pid(), pcb.pid(), e
-            )
-        });
-
-        // 拷贝文件描述符表
-        ProcessManager::copy_files(&clone_flags, &current_pcb, &pcb).unwrap_or_else(|e| {
-            panic!(
-                "fork: Failed to copy files from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
-                current_pcb.pid(), pcb.pid(), e
-            )
-        });
-
-        //拷贝信号相关数据
-        ProcessManager::copy_sighand(&clone_flags, &current_pcb, &pcb).unwrap_or_else(|e| {
-            panic!(
-                "fork: Failed to copy sighands from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
-                current_pcb.pid(), pcb.pid(), e
-            )
-        });
-
-        // 拷贝线程
-        ProcessManager::copy_thread(&clone_flags, &current_pcb, &pcb, &current_trapframe).unwrap_or_else(|e| {
-            panic!(
-                "fork: Failed to copy thread from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
-                current_pcb.pid(), pcb.pid(), e
-            )
-        });
+        let mut args = KernelCloneArgs::new();
+        args.flags = clone_flags;
+        Self::copy_process(&current_pcb, &pcb, args, current_trapframe)?;
 
         ProcessManager::add_pcb(pcb.clone());
 
@@ -168,7 +223,6 @@ impl ProcessManager {
             unsafe { new_pcb.basic_mut().set_user_vm(Some(old_address_space)) };
             return Ok(());
         }
-
         let new_address_space = old_address_space.write().try_clone().unwrap_or_else(|e| {
             panic!(
                 "copy_mm: Failed to clone address space of current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
@@ -215,4 +269,139 @@ impl ProcessManager {
         }
         return Ok(());
     }
+
+    /// 拷贝进程信息
+    ///
+    /// ## panic:
+    /// 某一步拷贝失败时会引发panic
+    /// 例如:copy_mm等失败时会触发panic
+    ///
+    /// ## 参数
+    ///
+    /// - clone_flags 标志位
+    /// - des_pcb 目标pcb
+    /// - src_pcb 拷贝源pcb
+    ///
+    /// ## return
+    /// - 发生错误时返回Err(SystemError)
+    pub fn copy_process(
+        current_pcb: &Arc<ProcessControlBlock>,
+        pcb: &Arc<ProcessControlBlock>,
+        clone_args: KernelCloneArgs,
+        current_trapframe: &mut TrapFrame,
+    ) -> Result<(), SystemError> {
+        let clone_flags = clone_args.flags;
+        // 不允许与不同namespace的进程共享根目录
+        if (clone_flags == (CloneFlags::CLONE_NEWNS | CloneFlags::CLONE_FS))
+            || clone_flags == (CloneFlags::CLONE_NEWUSER | CloneFlags::CLONE_FS)
+        {
+            return Err(SystemError::EINVAL);
+        }
+
+        // 线程组必须共享信号,分离线程只能在线程组内启动。
+        if clone_flags.contains(CloneFlags::CLONE_THREAD)
+            && !clone_flags.contains(CloneFlags::CLONE_SIGHAND)
+        {
+            return Err(SystemError::EINVAL);
+        }
+
+        // 共享信号处理器意味着共享vm。
+        // 线程组也意味着共享vm。阻止这种情况可以简化其他代码。
+        if clone_flags.contains(CloneFlags::CLONE_SIGHAND)
+            && !clone_flags.contains(CloneFlags::CLONE_VM)
+        {
+            return Err(SystemError::EINVAL);
+        }
+
+        // TODO: 处理CLONE_PARENT 与 SIGNAL_UNKILLABLE的情况
+
+        // 如果新进程使用不同的 pid 或 namespace,
+        // 则不允许它与分叉任务共享线程组。
+        if clone_flags.contains(CloneFlags::CLONE_THREAD) {
+            if clone_flags.contains(CloneFlags::CLONE_NEWUSER | CloneFlags::CLONE_NEWPID) {
+                return Err(SystemError::EINVAL);
+            }
+            // TODO: 判断新进程与当前进程namespace是否相同,不同则返回错误
+        }
+
+        // 如果新进程将处于不同的time namespace,
+        // 则不能让它共享vm或线程组。
+        if clone_flags.contains(CloneFlags::CLONE_THREAD | CloneFlags::CLONE_VM) {
+            // TODO: 判断time namespace,不同则返回错误
+        }
+
+        if clone_flags.contains(CloneFlags::CLONE_PIDFD)
+            && clone_flags.contains(CloneFlags::CLONE_DETACHED | CloneFlags::CLONE_THREAD)
+        {
+            return Err(SystemError::EINVAL);
+        }
+
+        // TODO: 克隆前应该锁信号处理,等待克隆完成后再处理
+
+        // 克隆架构相关
+        *pcb.arch_info() = current_pcb.arch_info_irqsave().clone();
+
+        // 为内核线程设置WorkerPrivate
+        if current_pcb.flags().contains(ProcessFlags::KTHREAD) {
+            *pcb.worker_private() =
+                Some(WorkerPrivate::KernelThread(KernelThreadPcbPrivate::new()));
+        }
+
+        // 设置clear_child_tid,在线程结束时将其置0以通知父进程
+        if clone_flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) {
+            pcb.thread.write().clear_child_tid = Some(clone_args.child_tid);
+        }
+
+        // 设置child_tid,意味着子线程能够知道自己的id
+        if clone_flags.contains(CloneFlags::CLONE_CHILD_SETTID) {
+            pcb.thread.write().set_child_tid = Some(clone_args.child_tid);
+        }
+
+        // 将子进程/线程的id存储在用户态传进的地址中
+        if clone_flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
+            let mut writer = UserBufferWriter::new(
+                clone_args.parent_tid.data() as *mut i32,
+                core::mem::size_of::<i32>(),
+                true,
+            )?;
+
+            writer.copy_one_to_user(&(pcb.pid().0 as i32), 0)?;
+        }
+
+        // 拷贝标志位
+        Self::copy_flags(&clone_flags, &pcb).unwrap_or_else(|e| {
+            panic!(
+                "fork: Failed to copy flags from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
+                current_pcb.pid(), pcb.pid(), e
+            )
+        });
+
+        // 拷贝用户地址空间
+        Self::copy_mm(&clone_flags, &current_pcb, &pcb).unwrap_or_else(|e| {
+            panic!(
+                "fork: Failed to copy mm from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
+                current_pcb.pid(), pcb.pid(), e
+            )
+        });
+
+        // 拷贝文件描述符表
+        Self::copy_files(&clone_flags, &current_pcb, &pcb).unwrap_or_else(|e| {
+            panic!(
+                "fork: Failed to copy files from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
+                current_pcb.pid(), pcb.pid(), e
+            )
+        });
+
+        // todo: 拷贝信号相关数据
+
+        // 拷贝线程
+        Self::copy_thread(&current_pcb, &pcb, clone_args,&current_trapframe).unwrap_or_else(|e| {
+            panic!(
+                "fork: Failed to copy thread from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
+                current_pcb.pid(), pcb.pid(), e
+            )
+        });
+
+        Ok(())
+    }
 }

+ 5 - 3
kernel/src/process/kthread.rs

@@ -282,9 +282,11 @@ impl KernelThreadMechanism {
             // 初始化kthreadd
             let closure = KernelThreadClosure::EmptyClosure((Box::new(Self::kthread_daemon), ()));
             let info = KernelThreadCreateInfo::new(closure, "kthreadd".to_string());
-            let kthreadd_pid: Pid =
-                Self::__inner_create(&info, CloneFlags::CLONE_FS | CloneFlags::CLONE_SIGNAL)
-                    .expect("Failed to create kthread daemon");
+            let kthreadd_pid: Pid = Self::__inner_create(
+                &info,
+                CloneFlags::CLONE_VM | CloneFlags::CLONE_FS | CloneFlags::CLONE_SIGNAL,
+            )
+            .expect("Failed to create kthread daemon");
 
             let pcb = ProcessManager::find(kthreadd_pid).unwrap();
             ProcessManager::wakeup(&pcb).expect("Failed to wakeup kthread daemon");

+ 95 - 27
kernel/src/process/mod.rs

@@ -1,5 +1,6 @@
 use core::{
     hash::{Hash, Hasher},
+    hint::spin_loop,
     intrinsics::{likely, unlikely},
     mem::ManuallyDrop,
     sync::atomic::{compiler_fence, AtomicBool, AtomicI32, AtomicIsize, AtomicUsize, Ordering},
@@ -29,6 +30,10 @@ use crate::{
     libs::{
         align::AlignedBox,
         casting::DowncastArc,
+        futex::{
+            constant::{FutexFlag, FUTEX_BITSET_MATCH_ANY},
+            futex::Futex,
+        },
         rwlock::{RwLock, RwLockReadGuard, RwLockWriteGuard},
         spinlock::{SpinLock, SpinLockGuard},
         wait_queue::WaitQueue,
@@ -36,11 +41,12 @@ use crate::{
     mm::{percpu::PerCpuVar, set_INITIAL_PROCESS_ADDRESS_SPACE, ucontext::AddressSpace, VirtAddr},
     net::socket::SocketInode,
     sched::{
+        completion::Completion,
         core::{sched_enqueue, CPU_EXECUTING},
         SchedPolicy, SchedPriority,
     },
     smp::kick_cpu,
-    syscall::{Syscall, SystemError},
+    syscall::{user_access::clear_user, Syscall, SystemError},
 };
 
 use self::kthread::WorkerPrivate;
@@ -116,6 +122,12 @@ impl ProcessManager {
 
     /// 获取当前进程的pcb
     pub fn current_pcb() -> Arc<ProcessControlBlock> {
+        if unlikely(unsafe { !__PROCESS_MANAGEMENT_INIT_DONE }) {
+            kerror!("unsafe__PROCESS_MANAGEMENT_INIT_DONE == false");
+            loop {
+                spin_loop();
+            }
+        }
         return ProcessControlBlock::arch_current_pcb();
     }
 
@@ -252,7 +264,7 @@ impl ProcessManager {
         assert_eq!(
             CurrentIrqArch::is_irq_enabled(),
             false,
-            "interrupt must be disabled before enter ProcessManager::mark_sleep()"
+            "interrupt must be disabled before enter ProcessManager::mark_stop()"
         );
 
         let pcb = ProcessManager::current_pcb();
@@ -306,9 +318,31 @@ impl ProcessManager {
             .write()
             .set_state(ProcessState::Exited(exit_code));
         pcb.wait_queue.wakeup(Some(ProcessState::Blocked(true)));
+
+        // 进行进程退出后的工作
+        let thread = pcb.thread.write();
+        if let Some(addr) = thread.set_child_tid {
+            unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
+        }
+
+        if let Some(addr) = thread.clear_child_tid {
+            if Arc::strong_count(&pcb.basic().user_vm().expect("User VM Not found")) > 1 {
+                let _ =
+                    Futex::futex_wake(addr, FutexFlag::FLAGS_MATCH_NONE, 1, FUTEX_BITSET_MATCH_ANY);
+            }
+            unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
+        }
+
+        // 如果是vfork出来的进程,则需要处理completion
+        if thread.vfork_done.is_some() {
+            thread.vfork_done.as_ref().unwrap().complete_all();
+        }
+        drop(thread);
+        unsafe { pcb.basic_mut().set_user_vm(None) };
         drop(pcb);
         ProcessManager::exit_notify();
         drop(irq_guard);
+
         sched();
         loop {}
     }
@@ -316,15 +350,21 @@ impl ProcessManager {
     pub unsafe fn release(pid: Pid) {
         let pcb = ProcessManager::find(pid);
         if !pcb.is_none() {
-            let pcb = pcb.unwrap();
+            // let pcb = pcb.unwrap();
             // 判断该pcb是否在全局没有任何引用
-            if Arc::strong_count(&pcb) <= 1 {
-                drop(pcb);
-                ALL_PROCESS.lock().as_mut().unwrap().remove(&pid);
-            } else {
-                // 如果不为1就panic
-                panic!("pcb is still referenced");
-            }
+            // TODO: 当前,pcb的Arc指针存在泄露问题,引用计数不正确,打算在接下来实现debug专用的Arc,方便调试,然后解决这个bug。
+            //          因此目前暂时注释掉,使得能跑
+            // if Arc::strong_count(&pcb) <= 2 {
+            //     drop(pcb);
+            //     ALL_PROCESS.lock().as_mut().unwrap().remove(&pid);
+            // } else {
+            //     // 如果不为1就panic
+            //     let msg = format!("pcb '{:?}' is still referenced, strong count={}",pcb.pid(),  Arc::strong_count(&pcb));
+            //     kerror!("{}", msg);
+            //     panic!()
+            // }
+
+            ALL_PROCESS.lock().as_mut().unwrap().remove(&pid);
         }
     }
 
@@ -482,10 +522,13 @@ pub struct ProcessControlBlock {
     parent_pcb: RwLock<Weak<ProcessControlBlock>>,
 
     /// 子进程链表
-    children: RwLock<HashMap<Pid, Arc<ProcessControlBlock>>>,
+    children: RwLock<Vec<Pid>>,
 
     /// 等待队列
     wait_queue: WaitQueue,
+
+    /// 线程信息
+    thread: RwLock<ThreadInfo>,
 }
 
 impl ProcessControlBlock {
@@ -545,20 +588,26 @@ impl ProcessControlBlock {
             sig_info: RwLock::new(ProcessSignalInfo::default()),
             sig_struct: SpinLock::new(SignalStruct::default()),
             parent_pcb: RwLock::new(ppcb),
-            children: RwLock::new(HashMap::new()),
+            children: RwLock::new(Vec::new()),
             wait_queue: WaitQueue::INIT,
+            thread: RwLock::new(ThreadInfo::new()),
         };
 
         let pcb = Arc::new(pcb);
 
         // 设置进程的arc指针到内核栈的最低地址处
-        unsafe { pcb.kernel_stack.write().set_pcb(Arc::clone(&pcb)).unwrap() };
+        unsafe {
+            pcb.kernel_stack
+                .write()
+                .set_pcb(Arc::downgrade(&pcb))
+                .unwrap()
+        };
 
         // 将当前pcb加入父进程的子进程哈希表中
         if pcb.pid() > Pid(1) {
             if let Some(ppcb_arc) = pcb.parent_pcb.read().upgrade() {
                 let mut children = ppcb_arc.children.write();
-                children.insert(pcb.pid(), pcb.clone());
+                children.push(pcb.pid());
             } else {
                 panic!("parent pcb is None");
             }
@@ -700,11 +749,11 @@ impl ProcessControlBlock {
     unsafe fn adopt_childen(&self) -> Result<(), SystemError> {
         match ProcessManager::find(Pid(1)) {
             Some(init_pcb) => {
-                let mut childen_guard = self.children.write();
+                let childen_guard = self.children.write();
                 let mut init_childen_guard = init_pcb.children.write();
 
-                childen_guard.drain().for_each(|(pid, child)| {
-                    init_childen_guard.insert(pid, child);
+                childen_guard.iter().for_each(|pid| {
+                    init_childen_guard.push(*pid);
                 });
 
                 return Ok(());
@@ -747,12 +796,31 @@ impl Drop for ProcessControlBlock {
             .unwrap_or_else(|e| panic!("procfs_unregister_pid failed: error: {e:?}"));
 
         if let Some(ppcb) = self.parent_pcb.read().upgrade() {
-            ppcb.children.write().remove(&self.pid());
+            ppcb.children.write().drain_filter(|pid| *pid == self.pid());
         }
+    }
+}
+
+/// 线程信息
+#[derive(Debug)]
+pub struct ThreadInfo {
+    // 来自用户空间记录用户线程id的地址,在该线程结束时将该地址置0以通知父进程
+    clear_child_tid: Option<VirtAddr>,
+    set_child_tid: Option<VirtAddr>,
 
-        unsafe { ProcessManager::release(self.pid()) };
+    vfork_done: Option<Arc<Completion>>,
+}
+
+impl ThreadInfo {
+    pub fn new() -> Self {
+        Self {
+            clear_child_tid: None,
+            set_child_tid: None,
+            vfork_done: None,
+        }
     }
 }
+
 /// 进程的基本信息
 ///
 /// 这个结构体保存进程的基本信息,主要是那些不会随着进程的运行而经常改变的信息。
@@ -994,9 +1062,9 @@ impl KernelStack {
         return VirtAddr::new(self.stack.as_ref().unwrap().as_ptr() as usize + Self::SIZE);
     }
 
-    pub unsafe fn set_pcb(&mut self, pcb: Arc<ProcessControlBlock>) -> Result<(), SystemError> {
-        // 将一个Arc<ProcessControlBlock>放到内核栈的最低地址处
-        let p: *const ProcessControlBlock = Arc::into_raw(pcb);
+    pub unsafe fn set_pcb(&mut self, pcb: Weak<ProcessControlBlock>) -> Result<(), SystemError> {
+        // 将一个Weak<ProcessControlBlock>放到内核栈的最低地址处
+        let p: *const ProcessControlBlock = Weak::into_raw(pcb);
         let stack_bottom_ptr = self.start_address().data() as *mut *const ProcessControlBlock;
 
         // 如果内核栈的最低地址处已经有了一个pcb,那么,这里就不再设置,直接返回错误
@@ -1021,10 +1089,10 @@ impl KernelStack {
         }
 
         // 为了防止内核栈的pcb指针被释放,这里需要将其包装一下,使得Arc的drop不会被调用
-        let arc_wrapper: ManuallyDrop<Arc<ProcessControlBlock>> =
-            ManuallyDrop::new(Arc::from_raw(p));
+        let weak_wrapper: ManuallyDrop<Weak<ProcessControlBlock>> =
+            ManuallyDrop::new(Weak::from_raw(p));
 
-        let new_arc: Arc<ProcessControlBlock> = Arc::clone(&arc_wrapper);
+        let new_arc: Arc<ProcessControlBlock> = weak_wrapper.upgrade()?;
         return Some(new_arc);
     }
 }
@@ -1032,8 +1100,8 @@ impl KernelStack {
 impl Drop for KernelStack {
     fn drop(&mut self) {
         if !self.stack.is_none() {
-            let pcb_ptr: Arc<ProcessControlBlock> = unsafe {
-                Arc::from_raw(self.stack.as_ref().unwrap().as_ptr() as *const ProcessControlBlock)
+            let pcb_ptr: Weak<ProcessControlBlock> = unsafe {
+                Weak::from_raw(self.stack.as_ref().unwrap().as_ptr() as *const ProcessControlBlock)
             };
             drop(pcb_ptr);
         }

+ 102 - 7
kernel/src/process/syscall.rs

@@ -1,13 +1,24 @@
 use core::ffi::c_void;
 
-use alloc::{string::String, vec::Vec};
+use alloc::{
+    string::{String, ToString},
+    sync::Arc,
+    vec::Vec,
+};
 
-use super::{abi::WaitOption, fork::CloneFlags, Pid, ProcessManager, ProcessState};
+use super::{
+    abi::WaitOption,
+    fork::{CloneFlags, KernelCloneArgs},
+    KernelStack, Pid, ProcessManager, ProcessState,
+};
 use crate::{
     arch::{interrupt::TrapFrame, sched::sched, CurrentIrqArch},
     exception::InterruptArch,
-    filesystem::vfs::MAX_PATHLEN,
+    filesystem::{procfs::procfs_register_pid, vfs::MAX_PATHLEN},
+    include::bindings::bindings::verify_area,
+    mm::VirtAddr,
     process::ProcessControlBlock,
+    sched::completion::Completion,
     syscall::{
         user_access::{
             check_and_clone_cstr, check_and_clone_cstr_array, UserBufferReader, UserBufferWriter,
@@ -42,6 +53,11 @@ impl Syscall {
         //     argv,
         //     envp
         // );
+        // kdebug!(
+        //     "before execve: strong count: {}",
+        //     Arc::strong_count(&ProcessManager::current_pcb())
+        // );
+
         if path.is_null() {
             return Err(SystemError::EINVAL);
         }
@@ -66,6 +82,10 @@ impl Syscall {
         // 关闭设置了O_CLOEXEC的文件描述符
         let fd_table = ProcessManager::current_pcb().fd_table();
         fd_table.write().close_on_exec();
+        // kdebug!(
+        //     "after execve: strong count: {}",
+        //     Arc::strong_count(&ProcessManager::current_pcb())
+        // );
 
         return Ok(());
     }
@@ -95,12 +115,13 @@ impl Syscall {
 
         if pid > 0 {
             let pid = Pid(pid as usize);
-            let child_pcb = rd_childen.get(&pid).ok_or(SystemError::ECHILD)?.clone();
+            let child_pcb = ProcessManager::find(pid).ok_or(SystemError::ECHILD)?;
             drop(rd_childen);
 
             loop {
+                let state = child_pcb.sched_info().state();
                 // 获取退出码
-                match child_pcb.sched_info().state() {
+                match state {
                     ProcessState::Runnable => {
                         if options.contains(WaitOption::WNOHANG)
                             || options.contains(WaitOption::WNOWAIT)
@@ -123,12 +144,16 @@ impl Syscall {
                         }
                     }
                     ProcessState::Exited(status) => {
+                        // kdebug!("wait4: child exited, pid: {:?}, status: {status}\n", pid);
                         if !wstatus.is_null() {
                             wstatus_buf.copy_one_to_user(
-                                &(status | WaitOption::WEXITED.bits() as usize),
+                                &(status as u32 | WaitOption::WEXITED.bits()),
                                 0,
                             )?;
                         }
+                        drop(child_pcb);
+                        // kdebug!("wait4: to release {pid:?}");
+                        unsafe { ProcessManager::release(pid) };
                         return Ok(pid.into());
                     }
                 };
@@ -147,7 +172,8 @@ impl Syscall {
         } else {
             // 等待任意子进程(这两)
             let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
-            for (pid, pcb) in rd_childen.iter() {
+            for pid in rd_childen.iter() {
+                let pcb = ProcessManager::find(*pid).ok_or(SystemError::ECHILD)?;
                 if pcb.sched_info().state().is_exited() {
                     if !wstatus.is_null() {
                         wstatus_buf.copy_one_to_user(&0, 0)?;
@@ -200,4 +226,73 @@ impl Syscall {
         let current_pcb = ProcessManager::current_pcb();
         return Ok(current_pcb.basic().ppid());
     }
+
+    pub fn clone(
+        current_trapframe: &mut TrapFrame,
+        clone_args: KernelCloneArgs,
+    ) -> Result<usize, SystemError> {
+        let flags = clone_args.flags;
+
+        let vfork = Arc::new(Completion::new());
+
+        if flags.contains(CloneFlags::CLONE_PIDFD)
+            && flags.contains(CloneFlags::CLONE_PARENT_SETTID)
+        {
+            return Err(SystemError::EINVAL);
+        }
+
+        let current_pcb = ProcessManager::current_pcb();
+        let new_kstack = KernelStack::new()?;
+        let name = current_pcb.basic().name().to_string();
+        let pcb = ProcessControlBlock::new(name, new_kstack);
+        // 克隆pcb
+        ProcessManager::copy_process(&current_pcb, &pcb, clone_args, current_trapframe)?;
+        ProcessManager::add_pcb(pcb.clone());
+
+        // 向procfs注册进程
+        procfs_register_pid(pcb.pid()).unwrap_or_else(|e| {
+            panic!(
+                "fork: Failed to register pid to procfs, pid: [{:?}]. Error: {:?}",
+                pcb.pid(),
+                e
+            )
+        });
+
+        if flags.contains(CloneFlags::CLONE_VFORK) {
+            pcb.thread.write().vfork_done = Some(vfork.clone());
+        }
+
+        if pcb.thread.read().set_child_tid.is_some() {
+            let addr = pcb.thread.read().set_child_tid.unwrap();
+            let mut writer =
+                UserBufferWriter::new(addr.as_ptr::<i32>(), core::mem::size_of::<i32>(), true)?;
+            writer.copy_one_to_user(&(pcb.pid().data() as i32), 0)?;
+        }
+
+        ProcessManager::wakeup(&pcb).unwrap_or_else(|e| {
+            panic!(
+                "fork: Failed to wakeup new process, pid: [{:?}]. Error: {:?}",
+                pcb.pid(),
+                e
+            )
+        });
+
+        if flags.contains(CloneFlags::CLONE_VFORK) {
+            // 等待子进程结束或者exec;
+            vfork.wait_for_completion_interruptible()?;
+        }
+
+        return Ok(pcb.pid().0);
+    }
+
+    /// 设置线程地址
+    pub fn set_tid_address(ptr: usize) -> Result<usize, SystemError> {
+        if !unsafe { verify_area(ptr as u64, core::mem::size_of::<i32>() as u64) } {
+            return Err(SystemError::EFAULT);
+        }
+
+        let pcb = ProcessManager::current_pcb();
+        pcb.thread.write().clear_child_tid = Some(VirtAddr::new(ptr));
+        Ok(pcb.pid.0)
+    }
 }

+ 1 - 1
kernel/src/sched/completion.rs

@@ -97,7 +97,7 @@ impl Completion {
     }
 
     /// @brief 永久标记done为Complete_All,并从wait_queue中删除所有节点
-    pub fn complete_all(&mut self) {
+    pub fn complete_all(&self) {
         let mut inner = self.inner.lock_irqsave();
         inner.done = COMPLETE_ALL;
         inner.wait_queue.wakeup_all(None);

+ 104 - 6
kernel/src/syscall/mod.rs

@@ -3,7 +3,10 @@ use core::{
     sync::atomic::{AtomicBool, Ordering},
 };
 
-use crate::kdebug;
+use crate::{
+    libs::{futex::constant::FutexFlag, rand::GRandFlags},
+    process::fork::KernelCloneArgs,
+};
 
 use num_traits::{FromPrimitive, ToPrimitive};
 
@@ -21,7 +24,7 @@ use crate::{
     libs::align::page_align_up,
     mm::{verify_area, MemoryManagementArch, VirtAddr},
     net::syscall::SockAddr,
-    process::Pid,
+    process::{fork::CloneFlags, Pid},
     time::{
         syscall::{PosixTimeZone, PosixTimeval},
         TimeSpec,
@@ -372,7 +375,7 @@ pub const SYS_BIND: usize = 49;
 pub const SYS_LISTEN: usize = 50;
 pub const SYS_GETSOCKNAME: usize = 51;
 pub const SYS_GETPEERNAME: usize = 52;
-
+pub const SYS_SOCKET_PAIR: usize = 53;
 pub const SYS_SETSOCKOPT: usize = 54;
 pub const SYS_GETSOCKOPT: usize = 55;
 
@@ -536,7 +539,6 @@ impl Syscall {
                 Self::lseek(fd, w)
             }
             SYS_IOCTL => {
-                kdebug!("SYS_IOCTL");
                 let fd = args[0];
                 let cmd = args[1];
                 let data = args[2];
@@ -943,7 +945,7 @@ impl Syscall {
             SYS_MUNMAP => {
                 let addr = args[0];
                 let len = page_align_up(args[1]);
-                if addr & MMArch::PAGE_SIZE != 0 {
+                if addr & (MMArch::PAGE_SIZE - 1) != 0 {
                     // The addr argument is not a multiple of the page size
                     Err(SystemError::EINVAL)
                 } else {
@@ -953,7 +955,7 @@ impl Syscall {
             SYS_MPROTECT => {
                 let addr = args[0];
                 let len = page_align_up(args[1]);
-                if addr & MMArch::PAGE_SIZE != 0 {
+                if addr & (MMArch::PAGE_SIZE - 1) != 0 {
                     // The addr argument is not a multiple of the page size
                     Err(SystemError::EINVAL)
                 } else {
@@ -1023,6 +1025,102 @@ impl Syscall {
                 Self::mknod(path as *const i8, flags, DeviceNumber::from(dev_t))
             }
 
+            SYS_CLONE => {
+                let parent_tid = VirtAddr::new(args[2]);
+                let child_tid = VirtAddr::new(args[3]);
+
+                // 地址校验
+                verify_area(parent_tid, core::mem::size_of::<i32>())?;
+                verify_area(child_tid, core::mem::size_of::<i32>())?;
+
+                let mut clone_args = KernelCloneArgs::new();
+                clone_args.flags = CloneFlags::from_bits_truncate(args[0] as u64);
+                clone_args.stack = args[1];
+                clone_args.parent_tid = parent_tid;
+                clone_args.child_tid = child_tid;
+                clone_args.tls = args[4];
+                Self::clone(frame, clone_args)
+            }
+
+            SYS_FUTEX => {
+                let uaddr = VirtAddr::new(args[0]);
+                let operation = FutexFlag::from_bits(args[1] as u32).ok_or(SystemError::ENOSYS)?;
+                let val = args[2] as u32;
+                let utime = args[3];
+                let uaddr2 = VirtAddr::new(args[4]);
+                let val3 = args[5] as u32;
+
+                verify_area(uaddr, core::mem::size_of::<u32>())?;
+                verify_area(uaddr2, core::mem::size_of::<u32>())?;
+
+                let mut timespec = None;
+                if utime != 0 && operation.contains(FutexFlag::FLAGS_HAS_TIMEOUT) {
+                    let reader = UserBufferReader::new(
+                        utime as *const TimeSpec,
+                        core::mem::size_of::<TimeSpec>(),
+                        true,
+                    )?;
+
+                    timespec = Some(reader.read_one_from_user::<TimeSpec>(0)?.clone());
+                }
+
+                Self::do_futex(uaddr, operation, val, timespec, uaddr2, utime as u32, val3)
+            }
+
+            SYS_WRITEV => Self::writev(args[0] as i32, args[1], args[2]),
+
+            SYS_ARCH_PRCTL => Self::arch_prctl(args[0], args[1]),
+
+            SYS_SET_TID_ADDR => Self::set_tid_address(args[0]),
+
+            SYS_STAT => {
+                let path: &CStr = unsafe { CStr::from_ptr(args[0] as *const c_char) };
+                let path: Result<&str, core::str::Utf8Error> = path.to_str();
+                let res = if path.is_err() {
+                    Err(SystemError::EINVAL)
+                } else {
+                    let path: &str = path.unwrap();
+                    let kstat = args[1] as *mut PosixKstat;
+                    let vaddr = VirtAddr::new(kstat as usize);
+                    match verify_area(vaddr, core::mem::size_of::<PosixKstat>()) {
+                        Ok(_) => Self::stat(path, kstat),
+                        Err(e) => Err(e),
+                    }
+                };
+
+                res
+            }
+
+            // 目前为了适配musl-libc,以下系统调用先这样写着
+            SYS_GET_RANDOM => {
+                let flags = GRandFlags::from_bits(args[2] as u8).ok_or(SystemError::EINVAL)?;
+                Self::get_random(args[0] as *mut u8, args[1], flags)
+            }
+
+            SYS_SOCKET_PAIR => {
+                unimplemented!()
+            }
+
+            SYS_POLL => {
+                kwarn!("SYS_POLL has not yet been implemented");
+                Ok(0)
+            }
+
+            SYS_RT_SIGPROCMASK => {
+                kwarn!("SYS_RT_SIGPROCMASK has not yet been implemented");
+                Ok(0)
+            }
+
+            SYS_TKILL => {
+                kwarn!("SYS_TKILL has not yet been implemented");
+                Ok(0)
+            }
+
+            SYS_SIGALTSTACK => {
+                kwarn!("SYS_SIGALTSTACK has not yet been implemented");
+                Ok(0)
+            }
+
             _ => panic!("Unsupported syscall ID: {}", syscall_num),
         };
         return r;

+ 10 - 2
kernel/src/syscall/user_access.rs

@@ -172,6 +172,10 @@ impl<'a> UserBufferReader<'a> {
         });
     }
 
+    pub fn size(&self) -> usize {
+        return self.buffer.len();
+    }
+
     /// 从用户空间读取数据(到变量中)
     ///
     /// @param offset 字节偏移量
@@ -266,6 +270,10 @@ impl<'a> UserBufferWriter<'a> {
         });
     }
 
+    pub fn size(&self) -> usize {
+        return self.buffer.len();
+    }
+
     /// 从指定地址写入数据到用户空间
     ///
     /// @param data 要写入的数据地址
@@ -274,7 +282,7 @@ impl<'a> UserBufferWriter<'a> {
     ///
     pub fn copy_to_user<T: core::marker::Copy>(
         &'a mut self,
-        src: &'a [T],
+        src: &[T],
         offset: usize,
     ) -> Result<usize, SystemError> {
         let dst = Self::convert_with_offset(self.buffer, offset)?;
@@ -290,7 +298,7 @@ impl<'a> UserBufferWriter<'a> {
     ///
     pub fn copy_one_to_user<T: core::marker::Copy>(
         &'a mut self,
-        src: &'a T,
+        src: &T,
         offset: usize,
     ) -> Result<(), SystemError> {
         let dst = Self::convert_one_with_offset::<T>(self.buffer, offset)?;

+ 19 - 1
kernel/src/time/timer.rs

@@ -72,6 +72,7 @@ impl Timer {
             expire_jiffies,
             timer_func,
             self_ref: Weak::default(),
+            triggered: false,
         })));
 
         result.0.lock().self_ref = Arc::downgrade(&result);
@@ -112,7 +113,9 @@ impl Timer {
 
     #[inline]
     fn run(&self) {
-        let r = self.0.lock().timer_func.run();
+        let mut timer = self.0.lock();
+        timer.triggered = true;
+        let r = timer.timer_func.run();
         if unlikely(r.is_err()) {
             kerror!(
                 "Failed to run timer function: {self:?} {:?}",
@@ -120,6 +123,19 @@ impl Timer {
             );
         }
     }
+
+    /// ## 判断定时器是否已经触发
+    pub fn timeout(&self) -> bool {
+        self.0.lock().triggered
+    }
+
+    /// ## 取消定时器任务
+    pub fn cancel(&self) -> bool {
+        TIMER_LIST
+            .lock()
+            .drain_filter(|x| Arc::<Timer>::as_ptr(&x) == self as *const Timer);
+        true
+    }
 }
 
 /// 定时器类型
@@ -131,6 +147,8 @@ pub struct InnerTimer {
     pub timer_func: Box<dyn TimerFunction>,
     /// self_ref
     self_ref: Weak<Timer>,
+    /// 判断该计时器是否触发
+    triggered: bool,
 }
 
 #[derive(Debug)]