Browse Source

fix: 修复wait4系统调用部分语义与Linux不一致的问题 (#1080)

* fix: 修复wait4系统调用部分语义与Linux不一致的问题

解决wait不住/wait之后卡死的bug

---------

Signed-off-by: longjin <[email protected]>
LoGin 2 months ago
parent
commit
bcf0382763

+ 1 - 1
kernel/src/driver/tty/tty_job_control.rs

@@ -65,7 +65,7 @@ impl TtyJobCtrlManager {
                 ProcessManager::current_pcb()
                     .flags()
                     .insert(ProcessFlags::HAS_PENDING_SIGNAL);
-
+                log::debug!("job_ctrl_ioctl: kill. pgid: {pgid}, tty_pgid: {tty_pgid:?}");
                 return Err(SystemError::ERESTARTSYS);
             }
         }

+ 34 - 12
kernel/src/filesystem/eventfd.rs

@@ -82,6 +82,21 @@ impl EventFdInode {
         let count = self.eventfd.lock().count;
         return count > 0;
     }
+
+    fn do_poll(
+        &self,
+        _private_data: &FilePrivateData,
+        self_guard: &SpinLockGuard<'_, EventFd>,
+    ) -> Result<usize, SystemError> {
+        let mut events = EPollEventType::empty();
+        if self_guard.count != 0 {
+            events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM;
+        }
+        if self_guard.count != u64::MAX {
+            events |= EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM;
+        }
+        return Ok(events.bits() as usize);
+    }
 }
 
 impl IndexNode for EventFdInode {
@@ -125,6 +140,11 @@ impl IndexNode for EventFdInode {
             }
 
             drop(lock_efd);
+
+            if ProcessManager::current_pcb().has_pending_signal_fast() {
+                return Err(SystemError::ERESTARTSYS);
+            }
+
             let r = wq_wait_event_interruptible!(self.wait_queue, self.readable(), {});
             if r.is_err() {
                 ProcessManager::current_pcb()
@@ -138,7 +158,7 @@ impl IndexNode for EventFdInode {
         }
         let mut val = lock_efd.count;
 
-        let mut eventfd = self.eventfd.lock();
+        let mut eventfd = lock_efd;
         if eventfd.flags.contains(EventFdFlags::EFD_SEMAPHORE) {
             eventfd.count -= 1;
             val = 1;
@@ -147,8 +167,9 @@ impl IndexNode for EventFdInode {
         }
         let val_bytes = val.to_ne_bytes();
         buf[..8].copy_from_slice(&val_bytes);
+        let pollflag = EPollEventType::from_bits_truncate(self.do_poll(&data, &eventfd)? as u32);
+        drop(eventfd);
 
-        let pollflag = EPollEventType::from_bits_truncate(self.poll(&data)? as u32);
         // 唤醒epoll中等待的进程
         EventPoll::wakeup_epoll(&self.epitems, Some(pollflag))?;
 
@@ -178,6 +199,9 @@ impl IndexNode for EventFdInode {
             return Err(SystemError::EINVAL);
         }
         loop {
+            if ProcessManager::current_pcb().has_pending_signal() {
+                return Err(SystemError::ERESTARTSYS);
+            }
             let eventfd = self.eventfd.lock();
             if u64::MAX - eventfd.count > val {
                 break;
@@ -189,13 +213,17 @@ impl IndexNode for EventFdInode {
                 return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
             }
             drop(eventfd);
-            self.wait_queue.sleep();
+            self.wait_queue.sleep().ok();
         }
         let mut eventfd = self.eventfd.lock();
         eventfd.count += val;
+        drop(eventfd);
         self.wait_queue.wakeup_all(None);
 
-        let pollflag = EPollEventType::from_bits_truncate(self.poll(&data)? as u32);
+        let eventfd = self.eventfd.lock();
+        let pollflag = EPollEventType::from_bits_truncate(self.do_poll(&data, &eventfd)? as u32);
+        drop(eventfd);
+
         // 唤醒epoll中等待的进程
         EventPoll::wakeup_epoll(&self.epitems, Some(pollflag))?;
         return Ok(8);
@@ -206,14 +234,8 @@ impl IndexNode for EventFdInode {
     /// - 如果 counter 的值大于 0 ,那么 fd 的状态就是可读的
     /// - 如果能无阻塞地写入一个至少为 1 的值,那么 fd 的状态就是可写的
     fn poll(&self, _private_data: &FilePrivateData) -> Result<usize, SystemError> {
-        let mut events = EPollEventType::empty();
-        if self.eventfd.lock().count != 0 {
-            events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM;
-        }
-        if self.eventfd.lock().count != u64::MAX {
-            events |= EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM;
-        }
-        return Ok(events.bits() as usize);
+        let self_guard = self.eventfd.lock();
+        self.do_poll(_private_data, &self_guard)
     }
 
     fn metadata(&self) -> Result<Metadata, SystemError> {

+ 1 - 1
kernel/src/libs/semaphore.rs

@@ -37,7 +37,7 @@ impl Semaphore {
     fn down(&self) {
         if self.counter.fetch_sub(1, Ordering::Release) <= 0 {
             self.counter.fetch_add(1, Ordering::Relaxed);
-            self.wait_queue.sleep();
+            self.wait_queue.sleep().ok();
             //资源不充足,信号量<=0, 此时进程睡眠
         }
     }

+ 86 - 34
kernel/src/libs/wait_queue.rs

@@ -1,7 +1,7 @@
 // #![allow(dead_code)]
 use core::intrinsics::unlikely;
 
-use alloc::{collections::LinkedList, sync::Arc, vec::Vec};
+use alloc::{collections::VecDeque, sync::Arc, vec::Vec};
 use log::{error, warn};
 use system_error::SystemError;
 
@@ -19,23 +19,40 @@ use super::{
 
 #[derive(Debug)]
 struct InnerWaitQueue {
+    /// 等待队列是否已经死亡, 如果已经死亡, 则不能再添加新的等待进程
+    dead: bool,
     /// 等待队列的链表
-    wait_list: LinkedList<Arc<ProcessControlBlock>>,
+    wait_list: VecDeque<Arc<ProcessControlBlock>>,
 }
 
 /// 被自旋锁保护的等待队列
 #[derive(Debug)]
-pub struct WaitQueue(SpinLock<InnerWaitQueue>);
+pub struct WaitQueue {
+    inner: SpinLock<InnerWaitQueue>,
+}
 
 #[allow(dead_code)]
 impl WaitQueue {
     pub const fn default() -> Self {
-        WaitQueue(SpinLock::new(InnerWaitQueue::INIT))
+        WaitQueue {
+            inner: SpinLock::new(InnerWaitQueue::INIT),
+        }
+    }
+
+    fn inner_irqsave(&self) -> SpinLockGuard<InnerWaitQueue> {
+        self.inner.lock_irqsave()
+    }
+
+    fn inner(&self) -> SpinLockGuard<InnerWaitQueue> {
+        self.inner.lock()
     }
 
     pub fn prepare_to_wait_event(&self, interruptible: bool) -> Result<(), SystemError> {
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
         let pcb = ProcessManager::current_pcb();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         if Signal::signal_pending_state(interruptible, false, &pcb) {
             return Err(SystemError::ERESTARTSYS);
         } else {
@@ -51,7 +68,7 @@ impl WaitQueue {
     pub fn finish_wait(&self) {
         let pcb = ProcessManager::current_pcb();
         let mut writer = pcb.sched_info().inner_lock_write_irqsave();
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
 
         writer.set_state(ProcessState::Runnable);
         writer.set_wakeup();
@@ -62,34 +79,49 @@ impl WaitQueue {
     }
 
     /// @brief 让当前进程在等待队列上进行等待,并且,允许被信号打断
-    pub fn sleep(&self) {
+    pub fn sleep(&self) -> Result<(), SystemError> {
         before_sleep_check(0);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(true).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(guard);
         schedule(SchedMode::SM_NONE);
+        Ok(())
+    }
+
+    /// 标记等待队列已经死亡,不能再添加新的等待进程
+    pub fn mark_dead(&self) {
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        guard.dead = true;
+        drop(guard);
     }
 
     /// @brief 让当前进程在等待队列上进行等待,并且,在释放waitqueue的锁之前,执行f函数闭包
-    pub fn sleep_with_func<F>(&self, f: F)
+    pub fn sleep_with_func<F>(&self, f: F) -> Result<(), SystemError>
     where
         F: FnOnce(),
     {
         before_sleep_check(0);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
-        let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
+
         ProcessManager::mark_sleep(true).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
-        drop(irq_guard);
         guard.wait_list.push_back(ProcessManager::current_pcb());
         f();
 
         drop(guard);
         schedule(SchedMode::SM_NONE);
+
+        Ok(())
     }
 
     /// @brief 让当前进程在等待队列上进行等待. 但是,在释放waitqueue的锁之后,不会调用调度函数。
@@ -106,80 +138,95 @@ impl WaitQueue {
     ///
     /// 由于sleep_without_schedule不会调用调度函数,因此,如果开发者忘记在执行本函数之后,手动调用调度函数,
     /// 由于时钟中断到来或者‘其他cpu kick了当前cpu’,可能会导致一些未定义的行为。
-    pub unsafe fn sleep_without_schedule(&self) {
+    pub unsafe fn sleep_without_schedule(&self) -> Result<(), SystemError> {
         before_sleep_check(1);
         // 安全检查:确保当前处于中断禁止状态
         assert!(!CurrentIrqArch::is_irq_enabled());
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(true).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(guard);
+        Ok(())
     }
 
-    pub unsafe fn sleep_without_schedule_uninterruptible(&self) {
+    pub unsafe fn sleep_without_schedule_uninterruptible(&self) -> Result<(), SystemError> {
         before_sleep_check(1);
         // 安全检查:确保当前处于中断禁止状态
         assert!(!CurrentIrqArch::is_irq_enabled());
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(false).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(guard);
+        Ok(())
     }
     /// @brief 让当前进程在等待队列上进行等待,并且,不允许被信号打断
-    pub fn sleep_uninterruptible(&self) {
+    pub fn sleep_uninterruptible(&self) -> Result<(), SystemError> {
         before_sleep_check(0);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
-        let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(false).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
-        drop(irq_guard);
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(guard);
         schedule(SchedMode::SM_NONE);
+        Ok(())
     }
 
     /// @brief 让当前进程在等待队列上进行等待,并且,允许被信号打断。
     /// 在当前进程的pcb加入队列后,解锁指定的自旋锁。
-    pub fn sleep_unlock_spinlock<T>(&self, to_unlock: SpinLockGuard<T>) {
+    pub fn sleep_unlock_spinlock<T>(&self, to_unlock: SpinLockGuard<T>) -> Result<(), SystemError> {
         before_sleep_check(1);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
-        let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(true).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
-        drop(irq_guard);
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(to_unlock);
         drop(guard);
         schedule(SchedMode::SM_NONE);
+        Ok(())
     }
 
     /// @brief 让当前进程在等待队列上进行等待,并且,允许被信号打断。
     /// 在当前进程的pcb加入队列后,解锁指定的Mutex。
-    pub fn sleep_unlock_mutex<T>(&self, to_unlock: MutexGuard<T>) {
+    pub fn sleep_unlock_mutex<T>(&self, to_unlock: MutexGuard<T>) -> Result<(), SystemError> {
         before_sleep_check(1);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
-        let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
+
+        if !guard.can_sleep() {
+            return Err(SystemError::ESRCH);
+        }
         ProcessManager::mark_sleep(true).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
         });
-        drop(irq_guard);
         guard.wait_list.push_back(ProcessManager::current_pcb());
         drop(to_unlock);
         drop(guard);
         schedule(SchedMode::SM_NONE);
+        Ok(())
     }
 
     /// @brief 让当前进程在等待队列上进行等待,并且,不允许被信号打断。
     /// 在当前进程的pcb加入队列后,解锁指定的自旋锁。
     pub fn sleep_uninterruptible_unlock_spinlock<T>(&self, to_unlock: SpinLockGuard<T>) {
         before_sleep_check(1);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
         let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
         ProcessManager::mark_sleep(false).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
@@ -195,7 +242,7 @@ impl WaitQueue {
     /// 在当前进程的pcb加入队列后,解锁指定的Mutex。
     pub fn sleep_uninterruptible_unlock_mutex<T>(&self, to_unlock: MutexGuard<T>) {
         before_sleep_check(1);
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
         let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
         ProcessManager::mark_sleep(false).unwrap_or_else(|e| {
             panic!("sleep error: {:?}", e);
@@ -217,7 +264,7 @@ impl WaitQueue {
     /// @return true 成功唤醒进程
     /// @return false 没有唤醒进程
     pub fn wakeup(&self, state: Option<ProcessState>) -> bool {
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
         // 如果队列为空,则返回
         if guard.wait_list.is_empty() {
             return false;
@@ -246,7 +293,7 @@ impl WaitQueue {
     ///
     /// @param state 用于判断的state,如果一个进程与这个state相同,或者为None(表示不进行这个判断),则唤醒这个进程。
     pub fn wakeup_all(&self, state: Option<ProcessState>) {
-        let mut guard: SpinLockGuard<InnerWaitQueue> = self.0.lock_irqsave();
+        let mut guard: SpinLockGuard<InnerWaitQueue> = self.inner_irqsave();
         // 如果队列为空,则返回
         if guard.wait_list.is_empty() {
             return;
@@ -281,14 +328,19 @@ impl WaitQueue {
 
     /// @brief 获得当前等待队列的大小
     pub fn len(&self) -> usize {
-        return self.0.lock().wait_list.len();
+        return self.inner_irqsave().wait_list.len();
     }
 }
 
 impl InnerWaitQueue {
     pub const INIT: InnerWaitQueue = InnerWaitQueue {
-        wait_list: LinkedList::new(),
+        wait_list: VecDeque::new(),
+        dead: false,
     };
+
+    pub fn can_sleep(&self) -> bool {
+        return !self.dead;
+    }
 }
 
 fn before_sleep_check(max_preempt: usize) {

+ 10 - 4
kernel/src/net/event_poll/mod.rs

@@ -476,8 +476,8 @@ impl EventPoll {
                 }
 
                 // 如果有未处理的信号则返回错误
-                if current_pcb.sig_info_irqsave().sig_pending().signal().bits() != 0 {
-                    return Err(SystemError::EINTR);
+                if current_pcb.has_pending_signal_fast() {
+                    return Err(SystemError::ERESTARTSYS);
                 }
 
                 // 还未等待到事件发生,则睡眠
@@ -488,12 +488,18 @@ impl EventPoll {
                     let jiffies = next_n_us_timer_jiffies(
                         (timespec.tv_sec * 1000000 + timespec.tv_nsec / 1000) as u64,
                     );
-                    let inner = Timer::new(handle, jiffies);
+                    let inner: Arc<Timer> = Timer::new(handle, jiffies);
                     inner.activate();
                     timer = Some(inner);
                 }
                 let guard = epoll.0.lock_irqsave();
-                unsafe { guard.epoll_wq.sleep_without_schedule() };
+                // 睡眠,等待事件发生
+                // 如果wq已经dead,则直接返回错误
+                unsafe { guard.epoll_wq.sleep_without_schedule() }.inspect_err(|_| {
+                    if let Some(timer) = timer.as_ref() {
+                        timer.cancel();
+                    }
+                })?;
                 drop(guard);
                 schedule(SchedMode::SM_NONE);
                 // 被唤醒后,检查是否有事件可读

+ 63 - 37
kernel/src/process/exit.rs

@@ -5,13 +5,10 @@ use log::warn;
 use system_error::SystemError;
 
 use crate::{
-    arch::{
-        ipc::signal::{SigChildCode, Signal},
-        CurrentIrqArch,
-    },
-    exception::InterruptArch,
+    arch::ipc::signal::{SigChildCode, Signal},
     sched::{schedule, SchedMode},
     syscall::user_access::UserBufferWriter,
+    time::{sleep::nanosleep, Duration},
 };
 
 use super::{
@@ -108,33 +105,37 @@ pub fn kernel_wait4(
 /// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/exit.c#1573
 fn do_wait(kwo: &mut KernelWaitOption) -> Result<usize, SystemError> {
     let mut retval: Result<usize, SystemError>;
-    // todo: 在signal struct里面增加等待队列,并在这里初始化子进程退出的回调,使得子进程退出时,能唤醒当前进程。
-
-    loop {
-        kwo.no_task_error = Some(SystemError::ECHILD);
-        let child_pcb = ProcessManager::find(kwo.pid).ok_or(SystemError::ECHILD);
-        if kwo.pid_type != PidType::MAX && child_pcb.is_err() {
+    let mut tmp_child_pcb: Option<Arc<ProcessControlBlock>> = None;
+    macro_rules! notask {
+        ($outer: lifetime) => {
             if let Some(err) = &kwo.no_task_error {
                 retval = Err(err.clone());
             } else {
                 retval = Ok(0);
             }
 
-            if !kwo.options.contains(WaitOption::WNOHANG) {
+            if retval.is_err() && !kwo.options.contains(WaitOption::WNOHANG) {
                 retval = Err(SystemError::ERESTARTSYS);
-                if !ProcessManager::current_pcb()
-                    .sig_info_irqsave()
-                    .sig_pending()
-                    .has_pending()
-                {
+                if !ProcessManager::current_pcb().has_pending_signal_fast() {
+                    schedule(SchedMode::SM_PREEMPT);
                     // todo: 增加子进程退出的回调后,这里可以直接等待在自身的child_wait等待队列上。
                     continue;
                 } else {
-                    break;
+                    break $outer;
                 }
             } else {
-                break;
+                break $outer;
             }
+        };
+    }
+    // todo: 在signal struct里面增加等待队列,并在这里初始化子进程退出的回调,使得子进程退出时,能唤醒当前进程。
+
+    'outer: loop {
+        kwo.no_task_error = Some(SystemError::ECHILD);
+        let child_pcb = ProcessManager::find(kwo.pid).ok_or(SystemError::ECHILD);
+
+        if kwo.pid_type != PidType::MAX && child_pcb.is_err() {
+            notask!('outer);
         }
 
         if kwo.pid_type == PidType::PID {
@@ -143,37 +144,62 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result<usize, SystemError> {
             let child_weak = Arc::downgrade(&child_pcb);
             let r = do_waitpid(child_pcb, kwo);
             if let Some(r) = r {
-                return r;
-            } else {
-                child_weak.upgrade().unwrap().wait_queue.sleep();
+                retval = r;
+                break 'outer;
+            } else if let Err(SystemError::ESRCH) = child_weak.upgrade().unwrap().wait_queue.sleep()
+            {
+                // log::debug!("do_wait: child_pcb sleep failed");
+                continue;
             }
         } else if kwo.pid_type == PidType::MAX {
             // 等待任意子进程
-            // todo: 这里有问题!如果正在for循环的过程中,子进程退出了,可能会导致父进程永远等待。
+            // todo: 这里有问题!应当让当前进程sleep到自身的child_wait等待队列上,这样才高效。(还没实现)
             let current_pcb = ProcessManager::current_pcb();
-            let rd_childen = current_pcb.children.read();
-            let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
-            for pid in rd_childen.iter() {
-                let pcb = ProcessManager::find(*pid).ok_or(SystemError::ECHILD)?;
-                let state = pcb.sched_info().inner_lock_read_irqsave().state();
-                if state.is_exited() {
-                    kwo.ret_status = state.exit_code().unwrap() as i32;
-                    drop(pcb);
-                    unsafe { ProcessManager::release(*pid) };
-                    return Ok((*pid).into());
-                } else {
-                    unsafe { pcb.wait_queue.sleep_without_schedule() };
+            loop {
+                let rd_childen = current_pcb.children.read();
+                if rd_childen.is_empty() {
+                    break;
+                }
+                for pid in rd_childen.iter() {
+                    let pcb = ProcessManager::find(*pid).ok_or(SystemError::ECHILD)?;
+                    let sched_guard = pcb.sched_info().inner_lock_read_irqsave();
+                    let state = sched_guard.state();
+                    if state.is_exited() {
+                        kwo.ret_status = state.exit_code().unwrap() as i32;
+                        kwo.no_task_error = None;
+                        // 由于pcb的drop方法里面要获取父进程的children字段的写锁,所以这里不能直接drop pcb,
+                        // 而是要先break到外层循环,以便释放父进程的children字段的锁,才能drop pcb。
+                        // 否则会死锁。
+                        tmp_child_pcb = Some(pcb.clone());
+                        unsafe { ProcessManager::release(*pid) };
+                        retval = Ok((*pid).into());
+                        break 'outer;
+                    }
                 }
+                nanosleep(Duration::from_millis(100).into())?;
             }
-            drop(irq_guard);
-            schedule(SchedMode::SM_NONE);
         } else {
             // todo: 对于pgid的处理
             warn!("kernel_wait4: currently not support {:?}", kwo.pid_type);
             return Err(SystemError::EINVAL);
         }
+
+        notask!('outer);
     }
 
+    drop(tmp_child_pcb);
+    ProcessManager::current_pcb()
+        .sched_info
+        .inner_lock_write_irqsave()
+        .set_state(ProcessState::Runnable);
+
+    // log::debug!(
+    //     "do_wait, kwo.pid: {}, retval = {:?}, kwo: {:?}",
+    //     kwo.pid,
+    //     retval,
+    //     kwo.no_task_error
+    // );
+
     return retval;
 }
 

+ 55 - 43
kernel/src/process/mod.rs

@@ -388,56 +388,68 @@ impl ProcessManager {
     /// - `exit_code` : 进程的退出码
     pub fn exit(exit_code: usize) -> ! {
         // 关中断
-        let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
-        let pcb = ProcessManager::current_pcb();
-        let pid = pcb.pid();
-        pcb.sched_info
-            .inner_lock_write_irqsave()
-            .set_state(ProcessState::Exited(exit_code));
-        pcb.wait_queue.wakeup(Some(ProcessState::Blocked(true)));
-
-        let rq = cpu_rq(smp_get_processor_id().data() as usize);
-        let (rq, guard) = rq.self_lock();
-        rq.deactivate_task(
-            pcb.clone(),
-            DequeueFlag::DEQUEUE_SLEEP | DequeueFlag::DEQUEUE_NOCLOCK,
-        );
-        drop(guard);
+        let _irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
+        let pid: Pid;
+        {
+            let pcb = ProcessManager::current_pcb();
+            pid = pcb.pid();
+            pcb.sched_info
+                .inner_lock_write_irqsave()
+                .set_state(ProcessState::Exited(exit_code));
+            pcb.wait_queue.mark_dead();
+            pcb.wait_queue.wakeup_all(Some(ProcessState::Blocked(true)));
+
+            let rq = cpu_rq(smp_get_processor_id().data() as usize);
+            let (rq, guard) = rq.self_lock();
+            rq.deactivate_task(
+                pcb.clone(),
+                DequeueFlag::DEQUEUE_SLEEP | DequeueFlag::DEQUEUE_NOCLOCK,
+            );
+            drop(guard);
 
-        // 进行进程退出后的工作
-        let thread = pcb.thread.write_irqsave();
-        if let Some(addr) = thread.set_child_tid {
-            unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
-        }
+            // 进行进程退出后的工作
+            let thread = pcb.thread.write_irqsave();
+            if let Some(addr) = thread.set_child_tid {
+                unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
+            }
 
-        if let Some(addr) = thread.clear_child_tid {
-            if Arc::strong_count(&pcb.basic().user_vm().expect("User VM Not found")) > 1 {
-                let _ =
-                    Futex::futex_wake(addr, FutexFlag::FLAGS_MATCH_NONE, 1, FUTEX_BITSET_MATCH_ANY);
+            if let Some(addr) = thread.clear_child_tid {
+                if Arc::strong_count(&pcb.basic().user_vm().expect("User VM Not found")) > 1 {
+                    let _ = Futex::futex_wake(
+                        addr,
+                        FutexFlag::FLAGS_MATCH_NONE,
+                        1,
+                        FUTEX_BITSET_MATCH_ANY,
+                    );
+                }
+                unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
             }
-            unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
-        }
 
-        RobustListHead::exit_robust_list(pcb.clone());
+            RobustListHead::exit_robust_list(pcb.clone());
 
-        // 如果是vfork出来的进程,则需要处理completion
-        if thread.vfork_done.is_some() {
-            thread.vfork_done.as_ref().unwrap().complete_all();
-        }
-        drop(thread);
-        unsafe { pcb.basic_mut().set_user_vm(None) };
-        pcb.exit_files();
-
-        // TODO 由于未实现进程组,tty记录的前台进程组等于当前进程,故退出前要置空
-        // 后续相关逻辑需要在SYS_EXIT_GROUP系统调用中实现
-        if let Some(tty) = pcb.sig_info_irqsave().tty() {
-            tty.core().contorl_info_irqsave().pgid = None;
+            // 如果是vfork出来的进程,则需要处理completion
+            if thread.vfork_done.is_some() {
+                thread.vfork_done.as_ref().unwrap().complete_all();
+            }
+            drop(thread);
+            unsafe { pcb.basic_mut().set_user_vm(None) };
+            pcb.exit_files();
+
+            // TODO 由于未实现进程组,tty记录的前台进程组等于当前进程,故退出前要置空
+            // 后续相关逻辑需要在SYS_EXIT_GROUP系统调用中实现
+            if let Some(tty) = pcb.sig_info_irqsave().tty() {
+                // 临时解决方案!!! 临时解决方案!!! 引入进程组之后,要重写这个更新前台进程组的逻辑
+                let mut g = tty.core().contorl_info_irqsave();
+                if g.pgid == Some(pid) {
+                    g.pgid = None;
+                }
+            }
+            pcb.sig_info_mut().set_tty(None);
+
+            drop(pcb);
+            ProcessManager::exit_notify();
         }
-        pcb.sig_info_mut().set_tty(None);
 
-        drop(pcb);
-        ProcessManager::exit_notify();
-        // unsafe { CurrentIrqArch::interrupt_enable() };
         __schedule(SchedMode::SM_NONE);
         error!("pid {pid:?} exited but sched again!");
         #[allow(clippy::empty_loop)]

+ 11 - 8
kernel/src/sched/completion.rs

@@ -4,6 +4,7 @@ use system_error::SystemError;
 
 use crate::{
     libs::{spinlock::SpinLock, wait_queue::WaitQueue},
+    process::ProcessManager,
     time::timer::schedule_timeout,
 };
 
@@ -29,20 +30,22 @@ impl Completion {
     /// @return 返回剩余时间或者SystemError
     fn do_wait_for_common(&self, mut timeout: i64, interuptible: bool) -> Result<i64, SystemError> {
         let mut inner = self.inner.lock_irqsave();
-
+        let pcb = ProcessManager::current_pcb();
         if inner.done == 0 {
             //loop break 类似 do while 保证进行一次信号检测
             loop {
                 //检查当前线程是否有未处理的信号
-                //             if (signal_pending_state(state, current)) {
-                // timeout = -ERESTARTSYS;
-                // break;
-                //}
+                if pcb.sig_info_irqsave().sig_pending().has_pending() {
+                    return Err(SystemError::ERESTARTSYS);
+                }
 
-                if interuptible {
-                    unsafe { inner.wait_queue.sleep_without_schedule() };
+                let e = if interuptible {
+                    unsafe { inner.wait_queue.sleep_without_schedule() }
                 } else {
-                    unsafe { inner.wait_queue.sleep_without_schedule_uninterruptible() };
+                    unsafe { inner.wait_queue.sleep_without_schedule_uninterruptible() }
+                };
+                if e.is_err() {
+                    break;
                 }
                 drop(inner);
                 timeout = schedule_timeout(timeout)?;

+ 1 - 1
user/dadk/config/nova_shell-0.1.0.toml

@@ -24,7 +24,7 @@ source = "git"
 source-path = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/NovaShell.git"
 # git标签或分支
 # 注意: branch和revision只能二选一,且source要设置为"git"
-revision = "cb835e03e4"
+revision = "feaebefaef"
 # 构建相关信息
 [build]
 # (可选)构建命令