Sfoglia il codice sorgente

refactor epoll related implementation (#1128)

* Refactor epoll related implementation

Add PollableInode trait
Implement PollableInode for pollable inodes

fix https://github.com/DragonOS-Community/DragonOS/issues/1094

Signed-off-by: Godones <chenlinfeng25@outlook.com>
linfeng 2 giorni fa
parent
commit
167d272792

+ 0 - 1
kernel/Cargo.toml

@@ -14,7 +14,6 @@ members = ["crates/*"]
 
 [features]
 default = ["fatfs", "kvm", "fatfs-secure", "static_keys_test"]
-# 内核栈回溯
 # kvm
 kvm = []
 

+ 10 - 0
kernel/src/driver/tty/tty_core.rs

@@ -490,6 +490,16 @@ impl TtyCoreData {
         self.epitems.lock().push_back(epitem)
     }
 
+    pub fn remove_epitem(&self, epitem: &Arc<EPollItem>) -> Result<(), SystemError> {
+        let mut guard = self.epitems.lock();
+        let len = guard.len();
+        guard.retain(|x| !Arc::ptr_eq(x, epitem));
+        if len != guard.len() {
+            return Ok(());
+        }
+        Err(SystemError::ENOENT)
+    }
+
     pub fn eptiems(&self) -> &SpinLock<LinkedList<Arc<EPollItem>>> {
         &self.epitems
     }

+ 44 - 39
kernel/src/driver/tty/tty_device.rs

@@ -26,7 +26,10 @@ use crate::{
     filesystem::{
         devfs::{devfs_register, DevFS, DeviceINode},
         kernfs::KernFSInode,
-        vfs::{file::FileMode, syscall::ModeType, FilePrivateData, FileType, IndexNode, Metadata},
+        vfs::{
+            file::FileMode, syscall::ModeType, FilePrivateData, FileType, IndexNode, Metadata,
+            PollableInode,
+        },
     },
     init::initcall::INITCALL_DEVICE,
     libs::{
@@ -34,7 +37,7 @@ use crate::{
         spinlock::SpinLockGuard,
     },
     mm::VirtAddr,
-    net::event_poll::{EPollItem, KernelIoctlData},
+    net::event_poll::EPollItem,
     process::ProcessManager,
     syscall::user_access::{UserBufferReader, UserBufferWriter},
 };
@@ -130,6 +133,43 @@ impl TtyDevice {
     pub fn name_ref(&self) -> &str {
         &self.name
     }
+
+    fn tty_core(private_data: &FilePrivateData) -> Result<Arc<TtyCore>, SystemError> {
+        let (tty, _) = if let FilePrivateData::Tty(tty_priv) = private_data {
+            (tty_priv.tty.clone(), tty_priv.mode)
+        } else {
+            return Err(SystemError::EIO);
+        };
+        Ok(tty)
+    }
+}
+
+impl PollableInode for TtyDevice {
+    fn poll(&self, private_data: &FilePrivateData) -> Result<usize, SystemError> {
+        let tty = TtyDevice::tty_core(private_data)?;
+        tty.ldisc().poll(tty)
+    }
+
+    fn add_epitem(
+        &self,
+        epitem: Arc<EPollItem>,
+        private_data: &FilePrivateData,
+    ) -> Result<(), SystemError> {
+        let tty = TtyDevice::tty_core(private_data)?;
+        let core = tty.core();
+        core.add_epitem(epitem);
+        Ok(())
+    }
+
+    fn remove_epitem(
+        &self,
+        epitem: &Arc<EPollItem>,
+        private_data: &FilePrivateData,
+    ) -> Result<(), SystemError> {
+        let tty = TtyDevice::tty_core(private_data)?;
+        let core = tty.core();
+        core.remove_epitem(epitem)
+    }
 }
 
 impl IndexNode for TtyDevice {
@@ -312,35 +352,6 @@ impl IndexNode for TtyDevice {
         Ok(())
     }
 
-    fn kernel_ioctl(
-        &self,
-        arg: Arc<dyn KernelIoctlData>,
-        data: &FilePrivateData,
-    ) -> Result<usize, SystemError> {
-        let epitem = arg
-            .arc_any()
-            .downcast::<EPollItem>()
-            .map_err(|_| SystemError::EFAULT)?;
-
-        let _ = UserBufferReader::new(
-            &epitem as *const Arc<EPollItem>,
-            core::mem::size_of::<Arc<EPollItem>>(),
-            false,
-        )?;
-
-        let (tty, _) = if let FilePrivateData::Tty(tty_priv) = data {
-            (tty_priv.tty(), tty_priv.mode)
-        } else {
-            return Err(SystemError::EIO);
-        };
-
-        let core = tty.core();
-
-        core.add_epitem(epitem.clone());
-
-        return Ok(0);
-    }
-
     fn ioctl(&self, cmd: u32, arg: usize, data: &FilePrivateData) -> Result<usize, SystemError> {
         let (tty, _) = if let FilePrivateData::Tty(tty_priv) = data {
             (tty_priv.tty(), tty_priv.mode)
@@ -423,14 +434,8 @@ impl IndexNode for TtyDevice {
         Ok(0)
     }
 
-    fn poll(&self, private_data: &FilePrivateData) -> Result<usize, SystemError> {
-        let (tty, _) = if let FilePrivateData::Tty(tty_priv) = private_data {
-            (tty_priv.tty.clone(), tty_priv.mode)
-        } else {
-            return Err(SystemError::EIO);
-        };
-
-        tty.ldisc().poll(tty)
+    fn as_pollable_inode(&self) -> Result<&dyn PollableInode, SystemError> {
+        Ok(self)
     }
 }
 

+ 39 - 38
kernel/src/filesystem/eventfd.rs

@@ -1,16 +1,16 @@
+use super::vfs::PollableInode;
 use crate::filesystem::vfs::file::{File, FileMode};
 use crate::filesystem::vfs::syscall::ModeType;
 use crate::filesystem::vfs::{FilePrivateData, FileSystem, FileType, IndexNode, Metadata};
 use crate::libs::spinlock::{SpinLock, SpinLockGuard};
 use crate::libs::wait_queue::WaitQueue;
-use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll, KernelIoctlData};
+use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll};
 use crate::process::{ProcessFlags, ProcessManager};
 use crate::sched::SchedMode;
 use crate::syscall::Syscall;
 use alloc::collections::LinkedList;
 use alloc::string::String;
 use alloc::sync::Arc;
-use alloc::sync::Weak;
 use alloc::vec::Vec;
 use core::any::Any;
 use ida::IdAllocator;
@@ -63,21 +63,6 @@ impl EventFdInode {
             epitems: SpinLock::new(LinkedList::new()),
         }
     }
-    pub fn remove_epoll(&self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError> {
-        let is_remove = !self
-            .epitems
-            .lock_irqsave()
-            .extract_if(|x| x.epoll().ptr_eq(epoll))
-            .collect::<Vec<_>>()
-            .is_empty();
-
-        if is_remove {
-            return Ok(());
-        }
-
-        Err(SystemError::ENOENT)
-    }
-
     fn readable(&self) -> bool {
         let count = self.eventfd.lock().count;
         return count > 0;
@@ -99,6 +84,36 @@ impl EventFdInode {
     }
 }
 
+impl PollableInode for EventFdInode {
+    fn poll(&self, _private_data: &FilePrivateData) -> Result<usize, SystemError> {
+        let self_guard = self.eventfd.lock();
+        self.do_poll(_private_data, &self_guard)
+    }
+
+    fn add_epitem(
+        &self,
+        epitem: Arc<EPollItem>,
+        _private_data: &FilePrivateData,
+    ) -> Result<(), SystemError> {
+        self.epitems.lock().push_back(epitem);
+        Ok(())
+    }
+
+    fn remove_epitem(
+        &self,
+        epitem: &Arc<EPollItem>,
+        _private_data: &FilePrivateData,
+    ) -> Result<(), SystemError> {
+        let mut guard = self.epitems.lock();
+        let len = guard.len();
+        guard.retain(|x| !Arc::ptr_eq(x, epitem));
+        if len != guard.len() {
+            return Ok(());
+        }
+        Err(SystemError::ENOENT)
+    }
+}
+
 impl IndexNode for EventFdInode {
     fn open(
         &self,
@@ -229,15 +244,6 @@ impl IndexNode for EventFdInode {
         return Ok(8);
     }
 
-    /// # 检查 eventfd 的状态
-    ///
-    /// - 如果 counter 的值大于 0 ,那么 fd 的状态就是可读的
-    /// - 如果能无阻塞地写入一个至少为 1 的值,那么 fd 的状态就是可写的
-    fn poll(&self, _private_data: &FilePrivateData) -> Result<usize, SystemError> {
-        let self_guard = self.eventfd.lock();
-        self.do_poll(_private_data, &self_guard)
-    }
-
     fn metadata(&self) -> Result<Metadata, SystemError> {
         let meta = Metadata {
             mode: ModeType::from_bits_truncate(0o755),
@@ -250,27 +256,22 @@ impl IndexNode for EventFdInode {
     fn resize(&self, _len: usize) -> Result<(), SystemError> {
         Ok(())
     }
-    fn kernel_ioctl(
-        &self,
-        arg: Arc<dyn KernelIoctlData>,
-        _data: &FilePrivateData,
-    ) -> Result<usize, SystemError> {
-        let epitem = arg
-            .arc_any()
-            .downcast::<EPollItem>()
-            .map_err(|_| SystemError::EFAULT)?;
-        self.epitems.lock().push_back(epitem);
-        Ok(0)
-    }
+
     fn fs(&self) -> Arc<dyn FileSystem> {
         panic!("EventFd does not have a filesystem")
     }
+
     fn as_any_ref(&self) -> &dyn Any {
         self
     }
+
     fn list(&self) -> Result<Vec<String>, SystemError> {
         Err(SystemError::EINVAL)
     }
+
+    fn as_pollable_inode(&self) -> Result<&dyn PollableInode, SystemError> {
+        Ok(self)
+    }
 }
 
 impl Syscall {

+ 18 - 63
kernel/src/filesystem/vfs/file.rs

@@ -1,28 +1,19 @@
 use core::sync::atomic::{AtomicUsize, Ordering};
 
-use alloc::{
-    string::String,
-    sync::{Arc, Weak},
-    vec::Vec,
-};
+use alloc::{string::String, sync::Arc, vec::Vec};
 use log::error;
 use system_error::SystemError;
 
 use super::{Dirent, FileType, IndexNode, InodeId, Metadata, SpecialNodeData};
-use crate::filesystem::eventfd::EventFdInode;
-use crate::perf::PerfEventInode;
 use crate::{
     driver::{
         base::{block::SeekFrom, device::DevicePrivateData},
         tty::tty_device::TtyFilePrivateData,
     },
     filesystem::procfs::ProcfsFilePrivateData,
-    ipc::pipe::{LockedPipeInode, PipeFsPrivateData},
+    ipc::pipe::PipeFsPrivateData,
     libs::{rwlock::RwLock, spinlock::SpinLock},
-    net::{
-        event_poll::{EPollItem, EPollPrivateData, EventPoll},
-        socket::SocketInode,
-    },
+    net::event_poll::{EPollItem, EPollPrivateData},
     process::{cred::Cred, ProcessManager},
 };
 
@@ -492,62 +483,26 @@ impl File {
         return Ok(());
     }
 
-    /// ## 向该文件添加一个EPollItem对象
-    ///
-    /// 在文件状态发生变化时,需要向epoll通知
-    pub fn add_epoll(&self, epitem: Arc<EPollItem>) -> Result<(), SystemError> {
-        match self.file_type {
-            FileType::Socket => {
-                let inode = self.inode.downcast_ref::<SocketInode>().unwrap();
-                let mut socket = inode.inner();
-
-                return socket.add_epoll(epitem);
-            }
-            FileType::Pipe => {
-                let inode = self.inode.downcast_ref::<LockedPipeInode>().unwrap();
-                return inode.add_epoll(epitem);
-            }
-            _ => {
-                let r = self.inode.kernel_ioctl(epitem, &self.private_data.lock());
-                if r.is_err() {
-                    return Err(SystemError::ENOSYS);
-                }
-
-                Ok(())
-            }
-        }
+    /// Add an EPollItem to the file
+    pub fn add_epitem(&self, epitem: Arc<EPollItem>) -> Result<(), SystemError> {
+        let private_data = self.private_data.lock();
+        self.inode
+            .as_pollable_inode()?
+            .add_epitem(epitem, &private_data)
     }
 
-    /// ## 删除一个绑定的epoll
-    pub fn remove_epoll(&self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError> {
-        match self.file_type {
-            FileType::Socket => {
-                let inode = self.inode.downcast_ref::<SocketInode>().unwrap();
-                let mut socket = inode.inner();
-
-                socket.remove_epoll(epoll)
-            }
-            FileType::Pipe => {
-                let inode = self.inode.downcast_ref::<LockedPipeInode>().unwrap();
-                inode.remove_epoll(epoll)
-            }
-            _ => {
-                let inode = self.inode.downcast_ref::<EventFdInode>();
-                if let Some(inode) = inode {
-                    return inode.remove_epoll(epoll);
-                }
-
-                let inode = self
-                    .inode
-                    .downcast_ref::<PerfEventInode>()
-                    .ok_or(SystemError::ENOSYS)?;
-                return inode.remove_epoll(epoll);
-            }
-        }
+    /// Remove epitems associated with the epoll
+    pub fn remove_epitem(&self, epitem: &Arc<EPollItem>) -> Result<(), SystemError> {
+        let private_data = self.private_data.lock();
+        self.inode
+            .as_pollable_inode()?
+            .remove_epitem(epitem, &private_data)
     }
 
+    /// Poll the file for events
     pub fn poll(&self) -> Result<usize, SystemError> {
-        self.inode.poll(&self.private_data.lock())
+        let private_data = self.private_data.lock();
+        self.inode.as_pollable_inode()?.poll(&private_data)
     }
 }
 

+ 26 - 16
kernel/src/filesystem/vfs/mod.rs

@@ -21,6 +21,7 @@ use crate::{
         spinlock::{SpinLock, SpinLockGuard},
     },
     mm::{fault::PageFaultMessage, VmFaultReason},
+    net::event_poll::EPollItem,
     time::PosixTimeSpec,
 };
 
@@ -121,6 +122,24 @@ bitflags! {
     }
 }
 
+/// The pollable inode trait
+pub trait PollableInode: Any + Sync + Send + Debug + CastFromSync {
+    /// Return the poll status of the inode
+    fn poll(&self, private_data: &FilePrivateData) -> Result<usize, SystemError>;
+    /// Add an epoll item to the inode
+    fn add_epitem(
+        &self,
+        epitem: Arc<EPollItem>,
+        private_data: &FilePrivateData,
+    ) -> Result<(), SystemError>;
+    /// Remove epitems associated with the epoll
+    fn remove_epitem(
+        &self,
+        epitm: &Arc<EPollItem>,
+        private_data: &FilePrivateData,
+    ) -> Result<(), SystemError>;
+}
+
 pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync {
     fn mmap(&self, _start: usize, _len: usize, _offset: usize) -> Result<(), SystemError> {
         return Err(SystemError::ENOSYS);
@@ -236,14 +255,6 @@ pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync {
         return Err(SystemError::ENOSYS);
     }
 
-    /// @brief 获取当前inode的状态。
-    ///
-    /// @return PollStatus结构体
-    fn poll(&self, _private_data: &FilePrivateData) -> Result<usize, SystemError> {
-        // 若文件系统没有实现此方法,则返回“不支持”
-        return Err(SystemError::ENOSYS);
-    }
-
     /// @brief 获取inode的元数据
     ///
     /// @return 成功:Ok(inode的元数据)
@@ -411,14 +422,6 @@ pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync {
         return Err(SystemError::ENOSYS);
     }
 
-    fn kernel_ioctl(
-        &self,
-        _arg: Arc<dyn crate::net::event_poll::KernelIoctlData>,
-        _data: &FilePrivateData,
-    ) -> Result<usize, SystemError> {
-        return Err(SystemError::ENOSYS);
-    }
-
     /// @brief 获取inode所在的文件系统的指针
     fn fs(&self) -> Arc<dyn FileSystem>;
 
@@ -625,6 +628,13 @@ pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync {
         );
         None
     }
+
+    /// Transform the inode to a pollable inode
+    ///
+    /// If the inode is not pollable, return an error
+    fn as_pollable_inode(&self) -> Result<&dyn PollableInode, SystemError> {
+        Err(SystemError::ENOSYS)
+    }
 }
 
 impl DowncastArc for dyn IndexNode {

+ 5 - 15
kernel/src/filesystem/vfs/mount.rs

@@ -25,7 +25,7 @@ use crate::{
 
 use super::{
     file::FileMode, syscall::ModeType, utils::DName, FilePrivateData, FileSystem, FileType,
-    IndexNode, InodeId, Magic, SuperBlock,
+    IndexNode, InodeId, Magic, PollableInode, SuperBlock,
 };
 
 const MOUNTFS_BLOCK_SIZE: u64 = 512;
@@ -435,15 +435,6 @@ impl IndexNode for MountFSInode {
         return self.inner_inode.ioctl(cmd, data, private_data);
     }
 
-    #[inline]
-    fn kernel_ioctl(
-        &self,
-        arg: Arc<dyn crate::net::event_poll::KernelIoctlData>,
-        data: &FilePrivateData,
-    ) -> Result<usize, SystemError> {
-        return self.inner_inode.kernel_ioctl(arg, data);
-    }
-
     #[inline]
     fn list(&self) -> Result<alloc::vec::Vec<alloc::string::String>, SystemError> {
         return self.inner_inode.list();
@@ -528,11 +519,6 @@ impl IndexNode for MountFSInode {
         self.inner_inode.special_node()
     }
 
-    #[inline]
-    fn poll(&self, private_data: &FilePrivateData) -> Result<usize, SystemError> {
-        self.inner_inode.poll(private_data)
-    }
-
     /// 若不支持,则调用第二种情况来从父目录获取文件名
     /// # Performance
     /// 应尽可能引入DName,
@@ -553,6 +539,10 @@ impl IndexNode for MountFSInode {
     fn page_cache(&self) -> Option<Arc<PageCache>> {
         self.inner_inode.page_cache()
     }
+
+    fn as_pollable_inode(&self) -> Result<&dyn PollableInode, SystemError> {
+        self.inner_inode.as_pollable_inode()
+    }
 }
 
 impl FileSystem for MountFS {

+ 23 - 15
kernel/src/ipc/pipe.rs

@@ -4,7 +4,7 @@ use crate::{
     arch::ipc::signal::{SigCode, Signal},
     filesystem::vfs::{
         core::generate_inode_id, file::FileMode, syscall::ModeType, FilePrivateData, FileSystem,
-        FileType, IndexNode, Metadata,
+        FileType, IndexNode, Metadata, PollableInode,
     },
     libs::{
         spinlock::{SpinLock, SpinLockGuard},
@@ -19,7 +19,6 @@ use crate::{
 use alloc::{
     collections::LinkedList,
     sync::{Arc, Weak},
-    vec::Vec,
 };
 use system_error::SystemError;
 
@@ -165,24 +164,33 @@ impl LockedPipeInode {
         let inode = self.inner.lock();
         return !inode.buf_full() || inode.reader == 0;
     }
+}
+
+impl PollableInode for LockedPipeInode {
+    fn poll(&self, private_data: &FilePrivateData) -> Result<usize, SystemError> {
+        self.inner.lock().poll(private_data)
+    }
 
-    pub fn add_epoll(&self, epitem: Arc<EPollItem>) -> Result<(), SystemError> {
+    fn add_epitem(
+        &self,
+        epitem: Arc<EPollItem>,
+        _private_data: &FilePrivateData,
+    ) -> Result<(), SystemError> {
         self.epitems.lock().push_back(epitem);
         Ok(())
     }
 
-    pub fn remove_epoll(&self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError> {
-        let is_remove = !self
-            .epitems
-            .lock_irqsave()
-            .extract_if(|x| x.epoll().ptr_eq(epoll))
-            .collect::<Vec<_>>()
-            .is_empty();
-
-        if is_remove {
+    fn remove_epitem(
+        &self,
+        epitem: &Arc<EPollItem>,
+        _private_data: &FilePrivateData,
+    ) -> Result<(), SystemError> {
+        let mut guard = self.epitems.lock();
+        let len = guard.len();
+        guard.retain(|x| !Arc::ptr_eq(x, epitem));
+        if len != guard.len() {
             return Ok(());
         }
-
         Err(SystemError::ENOENT)
     }
 }
@@ -496,7 +504,7 @@ impl IndexNode for LockedPipeInode {
         return Err(SystemError::ENOSYS);
     }
 
-    fn poll(&self, private_data: &FilePrivateData) -> Result<usize, SystemError> {
-        return self.inner.lock().poll(private_data);
+    fn as_pollable_inode(&self) -> Result<&dyn PollableInode, SystemError> {
+        Ok(self)
     }
 }

+ 25 - 28
kernel/src/net/event_poll/mod.rs

@@ -1,5 +1,4 @@
 use core::{
-    any::Any,
     fmt::Debug,
     sync::atomic::{AtomicBool, Ordering},
 };
@@ -9,7 +8,6 @@ use alloc::{
     sync::{Arc, Weak},
     vec::Vec,
 };
-use intertrait::CastFromSync;
 use system_error::SystemError;
 
 use crate::{
@@ -110,10 +108,6 @@ impl EPollItem {
     }
 }
 
-pub trait KernelIoctlData: Send + Sync + Any + Debug + CastFromSync {}
-
-impl KernelIoctlData for EPollItem {}
-
 /// ### Epoll文件的私有信息
 #[derive(Debug, Clone)]
 pub struct EPollPrivateData {
@@ -153,11 +147,6 @@ impl IndexNode for EPollInode {
         Err(SystemError::ENOSYS)
     }
 
-    fn poll(&self, _private_data: &FilePrivateData) -> Result<usize, SystemError> {
-        // 需要实现epoll嵌套epoll时,需要实现这里
-        todo!()
-    }
-
     fn fs(&self) -> Arc<dyn crate::filesystem::vfs::FileSystem> {
         todo!()
     }
@@ -221,9 +210,8 @@ impl EventPoll {
                 .get_file_by_fd(fd);
 
             if let Some(file) = file {
-                if let Some(self_ref) = self.self_ref.as_ref() {
-                    file.remove_epoll(self_ref)?;
-                }
+                let epitm = self.ep_items.get(&fd).unwrap();
+                file.remove_epitem(epitm)?;
             }
 
             self.ep_items.remove(&fd);
@@ -352,7 +340,7 @@ impl EventPoll {
                 }
             }
 
-            let ep_item = epoll_guard.ep_items.get(&dstfd);
+            let ep_item = epoll_guard.ep_items.get(&dstfd).cloned();
             match op {
                 EPollCtlOption::Add => {
                     // 如果已经存在,则返回错误
@@ -369,12 +357,16 @@ impl EventPoll {
                     Self::ep_insert(&mut epoll_guard, dst_file, epitem)?;
                 }
                 EPollCtlOption::Del => {
-                    // 不存在则返回错误
-                    if ep_item.is_none() {
-                        return Err(SystemError::ENOENT);
+                    match ep_item {
+                        Some(ref ep_item) => {
+                            // 删除
+                            Self::ep_remove(&mut epoll_guard, dstfd, Some(dst_file), ep_item)?;
+                        }
+                        None => {
+                            // 不存在则返回错误
+                            return Err(SystemError::ENOENT);
+                        }
                     }
-                    // 删除
-                    Self::ep_remove(&mut epoll_guard, dstfd, Some(dst_file))?;
                 }
                 EPollCtlOption::Mod => {
                     // 不存在则返回错误
@@ -700,7 +692,7 @@ impl EventPoll {
             return Err(SystemError::ENOSYS);
         }
 
-        dst_file.add_epoll(epitem.clone())?;
+        dst_file.add_epitem(epitem.clone())?;
         Ok(())
     }
 
@@ -708,9 +700,10 @@ impl EventPoll {
         epoll: &mut SpinLockGuard<EventPoll>,
         fd: i32,
         dst_file: Option<Arc<File>>,
+        epitem: &Arc<EPollItem>,
     ) -> Result<(), SystemError> {
         if let Some(dst_file) = dst_file {
-            dst_file.remove_epoll(epoll.self_ref.as_ref().unwrap())?;
+            dst_file.remove_epitem(epitem)?;
         }
 
         if let Some(epitem) = epoll.ep_items.remove(&fd) {
@@ -787,13 +780,17 @@ impl EventPoll {
         let mut epitems_guard = epitems.try_lock_irqsave()?;
         // 一次只取一个,因为一次也只有一个进程能拿到对应文件的🔓
         if let Some(epitem) = epitems_guard.pop_front() {
-            let pollflags = pollflags.unwrap_or({
-                if let Some(file) = epitem.file.upgrade() {
-                    EPollEventType::from_bits_truncate(file.poll()? as u32)
-                } else {
-                    EPollEventType::empty()
+            let pollflags = match pollflags {
+                Some(flags) => flags,
+                None => {
+                    if let Some(file) = epitem.file.upgrade() {
+                        // warning: deadlock will happen if poll() is called when pollflags is None
+                        EPollEventType::from_bits_truncate(file.poll()? as u32)
+                    } else {
+                        EPollEventType::empty()
+                    }
                 }
-            });
+            };
 
             if let Some(epoll) = epitem.epoll().upgrade() {
                 let mut epoll_guard = epoll.try_lock()?;

+ 39 - 38
kernel/src/net/socket/mod.rs

@@ -19,7 +19,7 @@ use crate::{
     arch::rand::rand,
     filesystem::vfs::{
         file::FileMode, syscall::ModeType, FilePrivateData, FileSystem, FileType, IndexNode,
-        Metadata,
+        Metadata, PollableInode,
     },
     libs::{
         rwlock::{RwLock, RwLockWriteGuard},
@@ -37,7 +37,7 @@ use self::{
 };
 
 use super::{
-    event_poll::{EPollEventType, EPollItem, EventPoll},
+    event_poll::{EPollEventType, EPollItem},
     Endpoint, Protocol, ShutdownType,
 };
 
@@ -242,29 +242,15 @@ pub trait Socket: Sync + Send + Debug + Any {
 
     fn as_any_mut(&mut self) -> &mut dyn Any;
 
-    fn add_epoll(&mut self, epitem: Arc<EPollItem>) -> Result<(), SystemError> {
+    fn add_epitem(&mut self, epitem: Arc<EPollItem>) -> Result<(), SystemError> {
         let posix_item = self.posix_item();
-        posix_item.add_epoll(epitem);
+        posix_item.add_epitem(epitem);
         Ok(())
     }
 
-    fn remove_epoll(&mut self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError> {
+    fn remove_epitm(&mut self, epitem: &Arc<EPollItem>) -> Result<(), SystemError> {
         let posix_item = self.posix_item();
-        posix_item.remove_epoll(epoll)?;
-
-        Ok(())
-    }
-
-    fn clear_epoll(&mut self) -> Result<(), SystemError> {
-        let posix_item = self.posix_item();
-
-        for epitem in posix_item.epitems.lock_irqsave().iter() {
-            let epoll = epitem.epoll();
-
-            if let Some(epoll) = epoll.upgrade() {
-                EventPoll::ep_remove(&mut epoll.lock_irqsave(), epitem.fd(), None)?;
-            }
-        }
+        posix_item.remove_epitem(epitem)?;
 
         Ok(())
     }
@@ -312,8 +298,6 @@ impl SocketInode {
                 PORT_MANAGER.unbind_port(socket.metadata().socket_type, ip.port);
             }
 
-            socket.clear_epoll()?;
-
             HANDLE_MAP
                 .write_irqsave()
                 .remove(&socket.socket_handle())
@@ -333,6 +317,29 @@ impl Drop for SocketInode {
     }
 }
 
+impl PollableInode for SocketInode {
+    fn poll(&self, _private_data: &FilePrivateData) -> Result<usize, SystemError> {
+        let events = self.0.lock_irqsave().poll();
+        return Ok(events.bits() as usize);
+    }
+
+    fn add_epitem(
+        &self,
+        epitem: Arc<EPollItem>,
+        _private_data: &FilePrivateData,
+    ) -> Result<(), SystemError> {
+        self.0.lock_irqsave().add_epitem(epitem)
+    }
+
+    fn remove_epitem(
+        &self,
+        epitem: &Arc<EPollItem>,
+        _private_data: &FilePrivateData,
+    ) -> Result<(), SystemError> {
+        self.0.lock_irqsave().remove_epitm(epitem)
+    }
+}
+
 impl IndexNode for SocketInode {
     fn open(
         &self,
@@ -369,11 +376,6 @@ impl IndexNode for SocketInode {
         self.0.lock_no_preempt().write(&buf[0..len], None)
     }
 
-    fn poll(&self, _private_data: &FilePrivateData) -> Result<usize, SystemError> {
-        let events = self.0.lock_irqsave().poll();
-        return Ok(events.bits() as usize);
-    }
-
     fn fs(&self) -> Arc<dyn FileSystem> {
         todo!()
     }
@@ -399,6 +401,10 @@ impl IndexNode for SocketInode {
     fn resize(&self, _len: usize) -> Result<(), SystemError> {
         return Ok(());
     }
+
+    fn as_pollable_inode(&self) -> Result<&dyn PollableInode, SystemError> {
+        Ok(self)
+    }
 }
 
 #[derive(Debug)]
@@ -426,22 +432,17 @@ impl PosixSocketHandleItem {
         schedule(SchedMode::SM_NONE);
     }
 
-    pub fn add_epoll(&self, epitem: Arc<EPollItem>) {
+    pub fn add_epitem(&self, epitem: Arc<EPollItem>) {
         self.epitems.lock_irqsave().push_back(epitem)
     }
 
-    pub fn remove_epoll(&self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError> {
-        let is_remove = !self
-            .epitems
-            .lock_irqsave()
-            .extract_if(|x| x.epoll().ptr_eq(epoll))
-            .collect::<Vec<_>>()
-            .is_empty();
-
-        if is_remove {
+    pub fn remove_epitem(&self, epitem: &Arc<EPollItem>) -> Result<(), SystemError> {
+        let mut guard = self.epitems.lock();
+        let len = guard.len();
+        guard.retain(|x| !Arc::ptr_eq(x, epitem));
+        if len != guard.len() {
             return Ok(());
         }
-
         Err(SystemError::ENOENT)
     }
 

+ 35 - 35
kernel/src/perf/mod.rs

@@ -6,7 +6,7 @@ use crate::filesystem::page_cache::PageCache;
 use crate::filesystem::vfs::file::{File, FileMode};
 use crate::filesystem::vfs::syscall::ModeType;
 use crate::filesystem::vfs::{
-    FilePrivateData, FileSystem, FileType, FsInfo, IndexNode, Metadata, SuperBlock,
+    FilePrivateData, FileSystem, FileType, FsInfo, IndexNode, Metadata, PollableInode, SuperBlock,
 };
 use crate::include::bindings::linux_bpf::{
     perf_event_attr, perf_event_sample_format, perf_sw_ids, perf_type_id,
@@ -15,7 +15,7 @@ use crate::libs::casting::DowncastArc;
 use crate::libs::spinlock::{SpinLock, SpinLockGuard};
 use crate::mm::fault::{PageFaultHandler, PageFaultMessage};
 use crate::mm::VmFaultReason;
-use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll, KernelIoctlData};
+use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll};
 use crate::perf::bpf::BpfPerfEvent;
 use crate::perf::util::{PerfEventIoc, PerfEventOpenFlags, PerfProbeArgs};
 use crate::process::ProcessManager;
@@ -24,7 +24,7 @@ use crate::syscall::Syscall;
 use alloc::boxed::Box;
 use alloc::collections::LinkedList;
 use alloc::string::String;
-use alloc::sync::{Arc, Weak};
+use alloc::sync::Arc;
 use alloc::vec::Vec;
 use core::any::Any;
 use core::ffi::c_void;
@@ -67,21 +67,6 @@ impl PerfEventInode {
             epitems: SpinLock::new(LinkedList::new()),
         }
     }
-    pub fn remove_epoll(
-        &self,
-        epoll: &Weak<SpinLock<EventPoll>>,
-    ) -> core::result::Result<(), SystemError> {
-        let is_remove = !self
-            .epitems
-            .lock_irqsave()
-            .extract_if(|x| x.epoll().ptr_eq(epoll))
-            .collect::<Vec<_>>()
-            .is_empty();
-        if is_remove {
-            return Ok(());
-        }
-        Err(SystemError::ENOENT)
-    }
     fn do_poll(&self) -> Result<usize> {
         let mut events = EPollEventType::empty();
         if self.event.readable() {
@@ -134,10 +119,6 @@ impl IndexNode for PerfEventInode {
         panic!("write_at not implemented for PerfEvent");
     }
 
-    fn poll(&self, _private_data: &FilePrivateData) -> Result<usize> {
-        self.do_poll()
-    }
-
     fn metadata(&self) -> Result<Metadata> {
         let meta = Metadata {
             mode: ModeType::from_bits_truncate(0o755),
@@ -177,32 +158,51 @@ impl IndexNode for PerfEventInode {
         }
     }
 
-    fn kernel_ioctl(
-        &self,
-        arg: Arc<dyn KernelIoctlData>,
-        _data: &FilePrivateData,
-    ) -> core::result::Result<usize, SystemError> {
-        let epitem = arg
-            .arc_any()
-            .downcast::<EPollItem>()
-            .map_err(|_| SystemError::EFAULT)?;
-        self.epitems.lock().push_back(epitem);
-        Ok(0)
-    }
-
     fn fs(&self) -> Arc<dyn FileSystem> {
         // panic!("PerfEvent does not have a filesystem")
         Arc::new(PerfFakeFs)
     }
+
     fn as_any_ref(&self) -> &dyn Any {
         self
     }
+
     fn list(&self) -> Result<Vec<String>> {
         Err(SystemError::ENOSYS)
     }
+
     fn page_cache(&self) -> Option<Arc<PageCache>> {
         self.event.page_cache()
     }
+
+    fn as_pollable_inode(&self) -> Result<&dyn PollableInode> {
+        Ok(self)
+    }
+}
+
+impl PollableInode for PerfEventInode {
+    fn poll(&self, _private_data: &FilePrivateData) -> Result<usize> {
+        self.do_poll()
+    }
+
+    fn add_epitem(&self, epitem: Arc<EPollItem>, _private_data: &FilePrivateData) -> Result<()> {
+        self.epitems.lock().push_back(epitem);
+        Ok(())
+    }
+
+    fn remove_epitem(
+        &self,
+        epitem: &Arc<EPollItem>,
+        _private_data: &FilePrivateData,
+    ) -> Result<()> {
+        let mut guard = self.epitems.lock();
+        let len = guard.len();
+        guard.retain(|x| !Arc::ptr_eq(x, epitem));
+        if len != guard.len() {
+            return Ok(());
+        }
+        Err(SystemError::ENOENT)
+    }
 }
 
 #[derive(Debug)]

+ 1 - 1
user/apps/syscall_ebpf/syscall_ebpf-ebpf/rust-toolchain.toml

@@ -1,3 +1,3 @@
 [toolchain]
-channel = "nightly"
+channel = "nightly-2024-11-05"
 components = ["rust-src"]

+ 1 - 0
user/apps/test_epoll/.gitignore

@@ -0,0 +1 @@
+test_epoll

+ 20 - 0
user/apps/test_epoll/Makefile

@@ -0,0 +1,20 @@
+ifeq ($(ARCH), x86_64)
+	CROSS_COMPILE=x86_64-linux-musl-
+else ifeq ($(ARCH), riscv64)
+	CROSS_COMPILE=riscv64-linux-musl-
+endif
+
+CC=$(CROSS_COMPILE)gcc
+
+.PHONY: all
+all: main.c
+	$(CC) -static -o test_epoll main.c
+
+.PHONY: install clean
+install: all
+	mv test_epoll $(DADK_CURRENT_BUILD_DIR)/test_epoll
+
+clean:
+	rm test_epoll *.o
+
+fmt:

+ 129 - 0
user/apps/test_epoll/main.c

@@ -0,0 +1,129 @@
+#include <errno.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <unistd.h>
+
+#define MAX_EVENTS 10
+
+static int efd;  // eventfd 描述符
+static int efd2; // eventfd 描述符
+
+// 工作线程:等待2秒后向 eventfd 写入事件通知
+void *worker_thread(void *arg) {
+  uint64_t u = 1;
+  printf("工作线程:等待2秒后发送事件通知...\n");
+  sleep(2); // 模拟耗时任务
+  printf("工作线程:发送事件通知...\n");
+  if (write(efd, &u, sizeof(u)) != sizeof(u)) {
+    perror("工作线程写入 eventfd 出错");
+    exit(EXIT_FAILURE);
+  }
+  printf("工作线程:事件通知已发送\n");
+  return NULL;
+}
+
+int main() {
+  int epoll_fd;
+  struct epoll_event ev, events[MAX_EVENTS];
+  int nfds;
+  pthread_t tid;
+
+  // 创建 eventfd,对象初始计数为 0
+  efd = eventfd(0, 0);
+  if (efd == -1) {
+    perror("创建 eventfd 失败");
+    exit(EXIT_FAILURE);
+  } else {
+    printf("创建 eventfd 成功,描述符 = %d\n", efd);
+  }
+
+  efd2 = dup(efd); // 复制 eventfd 描述符
+  if (efd2 == -1) {
+    perror("复制 eventfd 失败");
+    close(efd);
+    exit(EXIT_FAILURE);
+  } else {
+    printf("复制 eventfd 成功,描述符 = %d\n", efd2);
+  }
+
+  // 创建 epoll 实例
+  epoll_fd = epoll_create1(0);
+  if (epoll_fd == -1) {
+    perror("创建 epoll 实例失败");
+    close(efd);
+    exit(EXIT_FAILURE);
+  }
+
+  // 将 eventfd 添加到 epoll 监听队列,关注可读事件
+  ev.events = EPOLLIN;
+  ev.data.fd = efd;
+  if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, efd, &ev) == -1) {
+    perror("epoll_ctl 添加 eventfd 失败");
+    close(efd);
+    close(epoll_fd);
+    exit(EXIT_FAILURE);
+  }
+
+  // 将复制的 eventfd 添加到 epoll 监听队列,关注可读事件
+  ev.data.fd = efd2;
+  if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, efd2, &ev) == -1) {
+    perror("epoll_ctl 添加复制的 eventfd 失败");
+    close(efd);
+    close(efd2);
+    close(epoll_fd);
+    exit(EXIT_FAILURE);
+  }
+
+  // 创建工作线程,模拟事件发生
+  if (pthread_create(&tid, NULL, worker_thread, NULL) != 0) {
+    perror("创建工作线程失败");
+    close(efd);
+    close(efd2);
+    close(epoll_fd);
+    exit(EXIT_FAILURE);
+  }
+
+  printf("主线程:使用 epoll_wait 等待事件...\n");
+
+  // 阻塞等待事件发生
+  nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
+  if (nfds == -1) {
+    perror("epoll_wait 失败");
+    exit(EXIT_FAILURE);
+  } else {
+    printf("主线程:epoll_wait 返回,事件数量 = %d\n", nfds);
+  }
+
+  // 处理就绪事件
+  //   for (int i = 0; i < nfds; i++) {
+  //     if (events[i].data.fd == efd || events[i].data.fd == efd2) {
+  //       uint64_t count;
+  //       int fd = events[i].data.fd;
+  //       printf("主线程:事件发生在 fd = %d\n", fd);
+  //       if (read(fd, &count, sizeof(count)) != sizeof(count)) {
+  //         perror("从 eventfd 读取失败");
+  //         exit(EXIT_FAILURE);
+  //       }
+  //       printf("主线程:接收到 eventfd 事件,计数值 = %lu\n", count);
+  //     }
+  //   }
+
+  // 等待工作线程结束
+  pthread_join(tid, NULL);
+
+  int r = close(epoll_fd);
+  if (r == -1) {
+    perror("关闭 epoll 实例失败");
+    exit(EXIT_FAILURE);
+  } else {
+    printf("关闭 epoll 实例成功\n");
+  }
+  close(efd);
+  close(efd2); // 关闭复制的 eventfd 描述符
+  printf("test_epoll ok\n");
+  return 0;
+}

+ 46 - 0
user/dadk/config/test_epoll.toml

@@ -0,0 +1,46 @@
+# 用户程序名称
+name = "test_epoll"
+# 版本号
+version = "0.1.0"
+# 用户程序描述信息
+description = "test_epoll"
+# (可选)默认: false 是否只构建一次,如果为true,DADK会在构建成功后,将构建结果缓存起来,下次构建时,直接使用缓存的构建结果
+build-once = false
+#  (可选) 默认: false 是否只安装一次,如果为true,DADK会在安装成功后,不再重复安装
+install-once = false
+# 目标架构
+# 可选值:"x86_64", "aarch64", "riscv64"
+target-arch = ["x86_64"]
+# 任务源
+[task-source]
+# 构建类型
+# 可选值:"build-from_source", "install-from-prebuilt"
+type = "build-from-source"
+# 构建来源
+# "build_from_source" 可选值:"git", "local", "archive"
+# "install_from_prebuilt" 可选值:"local", "archive"
+source = "local"
+# 路径或URL
+source-path = "user/apps/test_epoll"
+# 构建相关信息
+[build]
+# (可选)构建命令
+build-command = "make install"
+# 安装相关信息
+[install]
+# (可选)安装到DragonOS的路径
+in-dragonos-path = "/bin"
+# 清除相关信息
+[clean]
+# (可选)清除命令
+clean-command = "make clean"
+# (可选)依赖项
+# 注意:如果没有依赖项,忽略此项,不允许只留一个[[depends]]
+# [[depends]]
+# name = "depend1"
+# version = "0.1.1"
+# (可选)环境变量
+# 注意:如果没有环境变量,忽略此项,不允许只留一个[[envs]]
+# [[envs]]
+# key = "PATH"
+# value = "/usr/bin"