Browse Source

feat: mntns支持unshare (#1262)

Signed-off-by: longjin <longjin@DragonOS.org>
LoGin 1 day ago
parent
commit
86929880ad

+ 11 - 2
kernel/src/filesystem/procfs/mod.rs

@@ -14,7 +14,11 @@ use system_error::SystemError;
 use crate::{
     arch::mm::LockedFrameAllocator,
     driver::base::device::device_number::DeviceNumber,
-    filesystem::vfs::{mount::MountFlags, vcore::generate_inode_id, FileType},
+    filesystem::vfs::{
+        mount::{MountFlags, MountPath},
+        vcore::generate_inode_id,
+        FileType,
+    },
     libs::{
         once::Once,
         rwlock::RwLock,
@@ -1195,11 +1199,16 @@ pub fn procfs_init() -> Result<(), SystemError> {
         let procfs: Arc<ProcFS> = ProcFS::new();
         let root_inode = ProcessManager::current_mntns().root_inode();
         // procfs 挂载
-        root_inode
+        let mntfs = root_inode
             .mkdir("proc", ModeType::from_bits_truncate(0o755))
             .expect("Unabled to find /proc")
             .mount(procfs, MountFlags::empty())
             .expect("Failed to mount at /proc");
+        let ino = root_inode.metadata().unwrap().inode_id;
+        let mount_path = Arc::new(MountPath::from("/proc"));
+        ProcessManager::current_mntns()
+            .add_mount(Some(ino), mount_path, mntfs)
+            .expect("Failed to add mount for /proc");
         info!("ProcFS mounted.");
         result = Some(Ok(()));
     });

+ 135 - 21
kernel/src/filesystem/vfs/mount.rs

@@ -1,6 +1,7 @@
 use core::{
     any::Any,
     fmt::Debug,
+    hash::Hash,
     sync::atomic::{compiler_fence, Ordering},
 };
 
@@ -113,6 +114,10 @@ bitflags! {
             MountFlags::MANDLOCK.bits() |
             MountFlags::I_VERSION.bits() |
             MountFlags::LAZYTIME.bits();
+
+        /// Old magic mount flag and mask
+        const MGC_VAL = 0xC0ED0000; // Magic value for mount flags
+        const MGC_MASK = 0xFFFF0000; // Mask for magic mount flags
     }
 }
 
@@ -243,6 +248,20 @@ impl Debug for MountFS {
     }
 }
 
+impl PartialEq for MountFS {
+    fn eq(&self, other: &Self) -> bool {
+        Arc::ptr_eq(&self.self_ref(), &other.self_ref())
+    }
+}
+
+impl Hash for MountFS {
+    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
+        self.mount_id.hash(state);
+    }
+}
+
+impl Eq for MountFS {}
+
 /// @brief MountFS的Index Node 注意,这个IndexNode只是一个中间层。它的目的是将具体文件系统的Inode与挂载机制连接在一起。
 #[derive(Debug)]
 #[cast_to([sync] IndexNode)]
@@ -281,10 +300,41 @@ impl MountFS {
         result
     }
 
+    pub fn deepcopy(&self, self_mountpoint: Option<Arc<MountFSInode>>) -> Arc<Self> {
+        let mountfs = Arc::new_cyclic(|self_ref| MountFS {
+            inner_filesystem: self.inner_filesystem.clone(),
+            mountpoints: SpinLock::new(BTreeMap::new()),
+            self_mountpoint: RwLock::new(self_mountpoint),
+            self_ref: self_ref.clone(),
+            namespace: Lazy::new(),
+            propagation: self.propagation.clone(),
+            mount_id: MountId::alloc(),
+            mount_flags: self.mount_flags,
+        });
+
+        return mountfs;
+    }
+
     pub fn mount_flags(&self) -> MountFlags {
         self.mount_flags
     }
 
+    pub fn add_mount(&self, inode_id: InodeId, mount_fs: Arc<MountFS>) -> Result<(), SystemError> {
+        // 检查是否已经存在同名的挂载点
+        if self.mountpoints.lock().contains_key(&inode_id) {
+            return Err(SystemError::EEXIST);
+        }
+
+        // 将新的挂载点添加到当前MountFS的挂载点列表中
+        self.mountpoints.lock().insert(inode_id, mount_fs.clone());
+
+        Ok(())
+    }
+
+    pub fn mountpoints(&self) -> SpinLockGuard<BTreeMap<InodeId, Arc<MountFS>>> {
+        self.mountpoints.lock()
+    }
+
     pub fn propagation(&self) -> Arc<MountPropagation> {
         self.propagation.clone()
     }
@@ -507,6 +557,14 @@ impl MountFSInode {
 
         Ok(absolute_path)
     }
+
+    pub fn clone_with_new_mount_fs(&self, mount_fs: Arc<MountFS>) -> Arc<MountFSInode> {
+        Arc::new_cyclic(|self_ref| MountFSInode {
+            inner_inode: self.inner_inode.clone(),
+            mount_fs,
+            self_ref: self_ref.clone(),
+        })
+    }
 }
 
 impl IndexNode for MountFSInode {
@@ -737,16 +795,16 @@ impl IndexNode for MountFSInode {
             Some(&ProcessManager::current_mntns()),
             mount_flags,
         );
-
         self.mount_fs
-            .mountpoints
-            .lock()
-            .insert(metadata.inode_id, new_mount_fs.clone());
+            .add_mount(metadata.inode_id, new_mount_fs.clone())?;
 
-        // todo: 这里也许不应该存储路径到MountList,而是应该存储inode的引用。因为同一个inner inode的路径在不同的mntns中可能是不一样的。
         let mount_path = self.absolute_path();
         let mount_path = Arc::new(MountPath::from(mount_path?));
-        ProcessManager::current_mntns().add_mount(mount_path, new_mount_fs.clone())?;
+        ProcessManager::current_mntns().add_mount(
+            Some(metadata.inode_id),
+            mount_path,
+            new_mount_fs.clone(),
+        )?;
 
         return Ok(new_mount_fs);
     }
@@ -762,14 +820,28 @@ impl IndexNode for MountFSInode {
         // debug!("from {:?}, to {:?}", from, self);
         let new_mount_fs = from.umount()?;
         self.mount_fs
-            .mountpoints
-            .lock()
-            .insert(metadata.inode_id, new_mount_fs.clone());
+            .add_mount(metadata.inode_id, new_mount_fs.clone())?;
         // 更新当前挂载点的self_mountpoint
         new_mount_fs
             .self_mountpoint
             .write()
             .replace(self.self_ref.upgrade().unwrap());
+        let mntns = ProcessManager::current_mntns();
+
+        let mount_path = mntns
+            .mount_list()
+            .get_mount_path_by_mountfs(&new_mount_fs)
+            .unwrap_or_else(|| {
+                panic!(
+                    "MountFS::mount_from: failed to get mount path for {:?}",
+                    self.mount_fs.name()
+                );
+            });
+
+        mntns.mount_list().remove(mount_path.as_str());
+        ProcessManager::current_mntns()
+            .add_mount(Some(metadata.inode_id), mount_path, new_mount_fs.clone())
+            .expect("MountFS::mount_from: failed to add mount.");
         return Ok(new_mount_fs);
     }
 
@@ -858,7 +930,7 @@ impl FileSystem for MountFS {
     }
 
     fn name(&self) -> &str {
-        "mountfs"
+        self.inner_filesystem.name()
     }
     fn super_block(&self) -> SuperBlock {
         SuperBlock::new(Magic::MOUNT_MAGIC, MOUNTFS_BLOCK_SIZE, MOUNTFS_MAX_NAMELEN)
@@ -937,7 +1009,13 @@ impl MountPath {
 
 // 维护一个挂载点的记录,以支持特定于文件系统的索引
 pub struct MountList {
-    mounts: RwLock<HashMap<Arc<MountPath>, Arc<MountFS>>>,
+    inner: RwLock<InnerMountList>,
+}
+
+struct InnerMountList {
+    mounts: HashMap<Arc<MountPath>, Arc<MountFS>>,
+    mfs2ino: HashMap<Arc<MountFS>, InodeId>,
+    ino2mp: HashMap<InodeId, Arc<MountPath>>,
 }
 
 impl MountList {
@@ -950,7 +1028,11 @@ impl MountList {
     /// - `MountList`: 新的挂载点列表实例
     pub fn new() -> Arc<Self> {
         Arc::new(MountList {
-            mounts: RwLock::new(HashMap::new()),
+            inner: RwLock::new(InnerMountList {
+                mounts: HashMap::new(),
+                ino2mp: HashMap::new(),
+                mfs2ino: HashMap::new(),
+            }),
         })
     }
 
@@ -963,11 +1045,18 @@ impl MountList {
     /// This function is thread-safe as it uses a RwLock to ensure safe concurrent access.
     ///
     /// # Arguments
+    /// * `ino` - An optional InodeId representing the inode of the `fs` mounted at.
     /// * `path` - The mount path where the filesystem will be mounted
     /// * `fs` - The filesystem instance to be mounted at the specified path
-    #[inline]
-    pub fn insert(&self, path: Arc<MountPath>, fs: Arc<MountFS>) {
-        self.mounts.write().insert(path, fs);
+    #[inline(never)]
+    pub fn insert(&self, ino: Option<InodeId>, path: Arc<MountPath>, fs: Arc<MountFS>) {
+        let mut inner = self.inner.write();
+        inner.mounts.insert(path.clone(), fs.clone());
+        // 如果不是根目录挂载点,则记录inode到挂载点的映射
+        if let Some(ino) = ino {
+            inner.ino2mp.insert(ino, path.clone());
+            inner.mfs2ino.insert(fs, ino);
+        }
     }
 
     /// # get_mount_point - 获取挂载点的路径
@@ -983,14 +1072,15 @@ impl MountList {
     /// - `Option<(String, String, Arc<MountFS>)>`:
     ///   - `Some((mount_point, rest_path, fs))`: 如果找到了匹配的挂载点,返回一个包含挂载点路径、剩余路径和挂载文件系统的元组。
     ///   - `None`: 如果没有找到匹配的挂载点,返回 None。
-    #[inline]
+    #[inline(never)]
     #[allow(dead_code)]
     pub fn get_mount_point<T: AsRef<str>>(
         &self,
         path: T,
     ) -> Option<(Arc<MountPath>, String, Arc<MountFS>)> {
-        self.mounts
+        self.inner
             .upgradeable_read()
+            .mounts
             .iter()
             .filter_map(|(key, fs)| {
                 let strkey = key.as_str();
@@ -1015,20 +1105,44 @@ impl MountList {
     /// ## 返回值
     ///
     /// - `Option<Arc<MountFS>>`: 返回一个 `Arc<MountFS>` 类型的可选值,表示被移除的挂载点,如果挂载点不存在则返回 `None`。
-    #[inline]
+    #[inline(never)]
     pub fn remove<T: Into<MountPath>>(&self, path: T) -> Option<Arc<MountFS>> {
-        self.mounts.write().remove(&path.into())
+        let mut inner = self.inner.write();
+        let path: MountPath = path.into();
+        // 从挂载点列表中移除指定路径的挂载点
+        if let Some(fs) = inner.mounts.remove(&path) {
+            if let Some(ino) = inner.mfs2ino.remove(&fs) {
+                inner.ino2mp.remove(&ino);
+            }
+            return Some(fs);
+        }
+        None
     }
 
     /// # clone_inner - 克隆内部挂载点列表
     pub fn clone_inner(&self) -> HashMap<Arc<MountPath>, Arc<MountFS>> {
-        self.mounts.read().clone()
+        self.inner.read().mounts.clone()
+    }
+
+    #[inline(never)]
+    pub fn get_mount_path_by_ino(&self, ino: InodeId) -> Option<Arc<MountPath>> {
+        self.inner.read().ino2mp.get(&ino).cloned()
+    }
+
+    #[inline(never)]
+    pub fn get_mount_path_by_mountfs(&self, mountfs: &Arc<MountFS>) -> Option<Arc<MountPath>> {
+        let inner = self.inner.read();
+        inner
+            .mfs2ino
+            .get(mountfs)
+            .and_then(|ino| inner.ino2mp.get(ino).cloned())
     }
 }
 
 impl Debug for MountList {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        f.debug_map().entries(self.mounts.read().iter()).finish()
+        let inner = self.inner.read();
+        f.debug_map().entries(inner.mounts.iter()).finish()
     }
 }
 

+ 178 - 35
kernel/src/filesystem/vfs/syscall/sys_mount.rs

@@ -3,7 +3,7 @@
 use crate::{
     arch::{interrupt::TrapFrame, syscall::nr::SYS_MOUNT},
     filesystem::vfs::{
-        fcntl::AtFlags, mount::MountFlags, produce_fs, utils::user_path_at, FileSystem, MountFS,
+        fcntl::AtFlags, mount::MountFlags, produce_fs, utils::user_path_at, IndexNode, MountFS,
         MAX_PATHLEN, VFS_MAX_FOLLOW_SYMLINK_TIMES,
     },
     process::ProcessManager,
@@ -12,6 +12,7 @@ use crate::{
         user_access,
     },
 };
+use alloc::string::String;
 use alloc::sync::Arc;
 use alloc::vec::Vec;
 use system_error::SystemError;
@@ -39,28 +40,33 @@ impl Syscall for SysMountHandle {
     }
 
     fn handle(&self, args: &[usize], _frame: &mut TrapFrame) -> Result<usize, SystemError> {
+        let source = Self::source(args);
         let target = Self::target(args);
         let filesystemtype = Self::filesystemtype(args);
         let data = Self::raw_data(args);
-        let source = Self::source(args);
         let mount_flags = Self::mountflags(args);
-
+        log::debug!(
+            "sys_mount: source: {:?}, target: {:?}, filesystemtype: {:?}, mount_flags: {:?}, data: {:?}",
+            source, target, filesystemtype, mount_flags, data
+        );
         let mount_flags = MountFlags::from_bits_truncate(mount_flags);
 
-        let target = user_access::check_and_clone_cstr(target, Some(MAX_PATHLEN))?
-            .into_string()
-            .map_err(|_| SystemError::EINVAL)?;
-        let source = user_access::check_and_clone_cstr(source, Some(MAX_PATHLEN))?
-            .into_string()
-            .map_err(|_| SystemError::EINVAL)?;
-        let source = source.as_str();
+        let target = copy_mount_string(target).inspect_err(|e| {
+            log::error!("Failed to read mount target: {:?}", e);
+        })?;
+        let source = copy_mount_string(source).inspect_err(|e| {
+            log::error!("Failed to read mount source: {:?}", e);
+        })?;
 
-        let fstype_str = user_access::check_and_clone_cstr(filesystemtype, Some(MAX_PATHLEN))?;
-        let fstype_str = fstype_str.to_str().map_err(|_| SystemError::EINVAL)?;
+        let data = copy_mount_string(data).inspect_err(|e| {
+            log::error!("Failed to read mount data: {:?}", e);
+        })?;
 
-        let fs = produce_fs(fstype_str, data, source)?;
+        let fstype_str = copy_mount_string(filesystemtype).inspect_err(|e| {
+            log::error!("Failed to read filesystem type: {:?}", e);
+        })?;
 
-        do_mount(fs, &target, mount_flags)?;
+        do_mount(source, target, fstype_str, data, mount_flags)?;
 
         return Ok(0);
     }
@@ -69,11 +75,11 @@ impl Syscall for SysMountHandle {
         let flags = MountFlags::from_bits(Self::mountflags(args)).unwrap_or(MountFlags::empty());
 
         vec![
-            FormattedSyscallParam::new("source", format!("{:#x}", Self::source(args) as usize)),
-            FormattedSyscallParam::new("target", format!("{:#x}", Self::target(args) as usize)),
+            FormattedSyscallParam::new("source", format!("{:?}", Self::source(args))),
+            FormattedSyscallParam::new("target", format!("{:?}", Self::target(args))),
             FormattedSyscallParam::new(
                 "filesystem type",
-                format!("{:#x}", Self::filesystemtype(args) as usize),
+                format!("{:?}", Self::filesystemtype(args)),
             ),
             FormattedSyscallParam::new(
                 "mountflags",
@@ -85,28 +91,40 @@ impl Syscall for SysMountHandle {
 }
 
 impl SysMountHandle {
-    fn source(args: &[usize]) -> *const u8 {
-        args[0] as *const u8
+    fn source(args: &[usize]) -> Option<*const u8> {
+        let source = args[0] as *const u8;
+        if source.is_null() {
+            None
+        } else {
+            Some(source)
+        }
     }
-    fn target(args: &[usize]) -> *const u8 {
-        args[1] as *const u8
+    fn target(args: &[usize]) -> Option<*const u8> {
+        let target = args[1] as *const u8;
+        if target.is_null() {
+            None
+        } else {
+            Some(target)
+        }
     }
-    fn filesystemtype(args: &[usize]) -> *const u8 {
-        args[2] as *const u8
+    fn filesystemtype(args: &[usize]) -> Option<*const u8> {
+        let p = args[2] as *const u8;
+        if p.is_null() {
+            None
+        } else {
+            Some(p)
+        }
     }
     fn mountflags(args: &[usize]) -> u32 {
         args[3] as u32
     }
-    fn raw_data(args: &[usize]) -> Option<&'static str> {
+    fn raw_data(args: &[usize]) -> Option<*const u8> {
         let raw = args[4] as *const u8;
         if raw.is_null() {
             return None;
         }
-        let len = (0..).find(|&i| unsafe { raw.add(i).read() } == 0).unwrap();
 
-        let slice = unsafe { core::slice::from_raw_parts(raw, len) };
-        let raw_str = core::str::from_utf8(slice).ok().unwrap();
-        Some(raw_str)
+        Some(raw)
     }
 }
 
@@ -129,22 +147,147 @@ syscall_table_macros::declare_syscall!(SYS_MOUNT, SysMountHandle);
 /// - `Ok(Arc<MountFS>)`: 挂载成功后返回挂载的文件系统。
 /// - `Err(SystemError)`: 挂载失败时返回错误。
 pub fn do_mount(
-    fs: Arc<dyn FileSystem>,
-    mount_point: &str,
+    source: Option<String>,
+    target: Option<String>,
+    filesystemtype: Option<String>,
+    data: Option<String>,
     mount_flags: MountFlags,
-) -> Result<Arc<MountFS>, SystemError> {
+) -> Result<(), SystemError> {
     let (current_node, rest_path) = user_path_at(
         &ProcessManager::current_pcb(),
         AtFlags::AT_FDCWD.bits(),
-        mount_point,
+        target.as_deref().unwrap_or(""),
     )?;
     let inode = current_node.lookup_follow_symlink(&rest_path, VFS_MAX_FOLLOW_SYMLINK_TIMES)?;
-    let result = ProcessManager::current_mntns().get_mount_point(mount_point);
+    return path_mount(source, inode, filesystemtype, data, mount_flags);
+}
+
+fn path_mount(
+    source: Option<String>,
+    target_inode: Arc<dyn IndexNode>,
+    filesystemtype: Option<String>,
+    data: Option<String>,
+    mut flags: MountFlags,
+) -> Result<(), SystemError> {
+    let mut mnt_flags = MountFlags::empty();
+
+    if flags & MountFlags::MGC_MASK == MountFlags::MGC_VAL {
+        flags.remove(MountFlags::MGC_MASK);
+    }
+
+    if flags.contains(MountFlags::NOUSER) {
+        return Err(SystemError::EINVAL);
+    }
+
+    // Default to relatime unless overriden
+    if !flags.contains(MountFlags::NOATIME) {
+        mnt_flags.insert(MountFlags::RELATIME);
+    }
+
+    if flags.contains(MountFlags::NOSUID) {
+        mnt_flags.insert(MountFlags::NOSUID);
+    }
+
+    if flags.contains(MountFlags::NODEV) {
+        mnt_flags.insert(MountFlags::NODEV);
+    }
+
+    if flags.contains(MountFlags::NOEXEC) {
+        mnt_flags.insert(MountFlags::NOEXEC);
+    }
+
+    if flags.contains(MountFlags::NOATIME) {
+        mnt_flags.insert(MountFlags::NOATIME);
+    }
+
+    if flags.contains(MountFlags::NODIRATIME) {
+        mnt_flags.insert(MountFlags::NODIRATIME);
+    }
+
+    if flags.contains(MountFlags::STRICTATIME) {
+        mnt_flags.remove(MountFlags::RELATIME);
+        mnt_flags.remove(MountFlags::NOATIME);
+    }
+    if flags.contains(MountFlags::RDONLY) {
+        mnt_flags.insert(MountFlags::RDONLY);
+    }
+    if flags.contains(MountFlags::NOSYMFOLLOW) {
+        mnt_flags.insert(MountFlags::NOSYMFOLLOW);
+    }
+
+    // todo: 处理remount时,atime相关的选项
+    // https://code.dragonos.org.cn/xref/linux-6.6.21/fs/namespace.c#3646
+
+    // todo: 参考linux的,实现对各个挂载选项的处理
+    // https://code.dragonos.org.cn/xref/linux-6.6.21/fs/namespace.c#3662
+    if flags.intersection(MountFlags::REMOUNT | MountFlags::BIND)
+        == (MountFlags::REMOUNT | MountFlags::BIND)
+    {
+        log::warn!("todo: reconfigure mnt");
+        return Err(SystemError::ENOSYS);
+    }
+
+    if flags.contains(MountFlags::REMOUNT) {
+        log::warn!("todo: remount");
+        return Err(SystemError::ENOSYS);
+    }
+
+    if flags.contains(MountFlags::BIND) {
+        log::warn!("todo: bind mnt");
+        return Err(SystemError::ENOSYS);
+    }
+    if flags.intersects(
+        MountFlags::SHARED | MountFlags::PRIVATE | MountFlags::SLAVE | MountFlags::UNBINDABLE,
+    ) {
+        log::warn!("todo: change mnt type: {:?}", flags);
+        // 这里暂时返回OK,否则unshare会失败!!!
+        return Ok(());
+        // return Err(SystemError::ENOSYS);
+    }
+
+    if flags.contains(MountFlags::MOVE) {
+        log::warn!("todo: move mnt");
+        return Err(SystemError::ENOSYS);
+    }
+
+    // 创建新的挂载
+    return do_new_mount(source, target_inode, filesystemtype, data, mnt_flags).map(|_| ());
+}
+
+fn do_new_mount(
+    source: Option<String>,
+    target_inode: Arc<dyn IndexNode>,
+    filesystemtype: Option<String>,
+    data: Option<String>,
+    mount_flags: MountFlags,
+) -> Result<Arc<MountFS>, SystemError> {
+    let fs_type_str = filesystemtype.ok_or(SystemError::EINVAL)?;
+    let source = source.ok_or(SystemError::EINVAL)?;
+    let fs = produce_fs(&fs_type_str, data.as_deref(), &source).inspect_err(|e| {
+        log::error!("Failed to produce filesystem: {:?}", e);
+    })?;
+
+    let abs_path = target_inode.absolute_path()?;
+
+    let result = ProcessManager::current_mntns().get_mount_point(&abs_path);
     if let Some((_, rest, _fs)) = result {
         if rest.is_empty() {
             return Err(SystemError::EBUSY);
         }
     }
-    // 移至IndexNode.mount()来记录
-    return inode.mount(fs, mount_flags);
+    return target_inode.mount(fs, mount_flags);
+}
+#[inline(never)]
+fn copy_mount_string(raw: Option<*const u8>) -> Result<Option<String>, SystemError> {
+    if let Some(raw) = raw {
+        let s = user_access::check_and_clone_cstr(raw, Some(MAX_PATHLEN))
+            .inspect_err(|e| {
+                log::error!("Failed to read mount string: {:?}", e);
+            })?
+            .into_string()
+            .map_err(|_| SystemError::EINVAL)?;
+        Ok(Some(s))
+    } else {
+        Ok(None)
+    }
 }

+ 122 - 9
kernel/src/process/namespace/mnt.rs

@@ -1,7 +1,7 @@
 use crate::{
     filesystem::vfs::{
         mount::{MountFlags, MountList, MountPath},
-        FileSystem, IndexNode, MountFS,
+        FileSystem, IndexNode, InodeId, MountFS,
     },
     libs::{once::Once, spinlock::SpinLock},
     process::{fork::CloneFlags, namespace::NamespaceType, ProcessManager},
@@ -39,8 +39,6 @@ pub fn root_mnt_namespace() -> Arc<MntNamespace> {
 pub struct MntNamespace {
     ns_common: NsCommon,
     self_ref: Weak<MntNamespace>,
-    /// 父namespace的弱引用
-    _parent: Option<Weak<MntNamespace>>,
     _user_ns: Arc<UserNamespace>,
     root_mountfs: Arc<MountFS>,
     inner: SpinLock<InnerMntNamespace>,
@@ -73,7 +71,6 @@ impl MntNamespace {
         let result = Arc::new_cyclic(|self_ref| Self {
             ns_common: NsCommon::new(0, NamespaceType::Mount),
             self_ref: self_ref.clone(),
-            _parent: None,
             _user_ns: super::user_namespace::INIT_USER_NAMESPACE.clone(),
             root_mountfs: ramfs.clone(),
             inner: SpinLock::new(InnerMntNamespace {
@@ -84,7 +81,7 @@ impl MntNamespace {
 
         ramfs.set_namespace(Arc::downgrade(&result));
         result
-            .add_mount(Arc::new(MountPath::from("/")), ramfs)
+            .add_mount(None, Arc::new(MountPath::from("/")), ramfs)
             .expect("Failed to add root mount");
 
         return result;
@@ -100,7 +97,32 @@ impl MntNamespace {
         self_mut.root_mountfs = new_root.clone();
         let (path, _, _) = inner_guard.mount_list.get_mount_point("/").unwrap();
 
-        inner_guard.mount_list.insert(path, new_root);
+        inner_guard.mount_list.insert(None, path, new_root);
+
+        // update mount list ino
+    }
+
+    fn copy_with_mountfs(&self, new_root: Arc<MountFS>, _user_ns: Arc<UserNamespace>) -> Arc<Self> {
+        let mut ns_common = self.ns_common.clone();
+        ns_common.level += 1;
+
+        let result = Arc::new_cyclic(|self_ref| Self {
+            ns_common,
+            self_ref: self_ref.clone(),
+            _user_ns,
+            root_mountfs: new_root.clone(),
+            inner: SpinLock::new(InnerMntNamespace {
+                _dead: false,
+                mount_list: MountList::new(),
+            }),
+        });
+
+        new_root.set_namespace(Arc::downgrade(&result));
+        result
+            .add_mount(None, Arc::new(MountPath::from("/")), new_root)
+            .expect("Failed to add root mount");
+
+        result
     }
 
     /// Creates a copy of the mount namespace for process cloning.
@@ -119,17 +141,94 @@ impl MntNamespace {
     /// # Behavior
     /// - If `CLONE_NEWNS` is not set, returns the current mount namespace
     /// - If `CLONE_NEWNS` is set, creates a new mount namespace (currently unimplemented)
+    #[inline(never)]
     pub fn copy_mnt_ns(
         &self,
         clone_flags: &CloneFlags,
-        _user_ns: Arc<UserNamespace>,
+        user_ns: Arc<UserNamespace>,
     ) -> Result<Arc<MntNamespace>, SystemError> {
         if !clone_flags.contains(CloneFlags::CLONE_NEWNS) {
             // Return the current mount namespace if CLONE_NEWNS is not set
             return Ok(self.self_ref.upgrade().unwrap());
         }
+        let inner = self.inner.lock();
+
+        let old_root_mntfs = self.root_mntfs().clone();
+        let mut queue: Vec<MountFSCopyInfo> = Vec::new();
+
+        // 由于root mntfs比较特殊,因此单独复制。
+        let new_root_mntfs = old_root_mntfs.deepcopy(None);
+        let new_mntns = self.copy_with_mountfs(new_root_mntfs, user_ns);
+        new_mntns
+            .add_mount(
+                None,
+                Arc::new(MountPath::from("/")),
+                new_mntns.root_mntfs().clone(),
+            )
+            .expect("Failed to add root mount");
+
+        for x in inner.mount_list.clone_inner().values() {
+            if Arc::ptr_eq(x, new_mntns.root_mntfs()) {
+                continue; // Skip the root mountfs
+            }
+        }
+        // 将root mntfs下的所有挂载点复制到新的mntns中
+        for (ino, mfs) in old_root_mntfs.mountpoints().iter() {
+            let mount_path = inner
+                .mount_list
+                .get_mount_path_by_ino(*ino)
+                .ok_or_else(|| {
+                    panic!(
+                        "mount_path not found for inode {:?}, mfs name: {}",
+                        ino,
+                        mfs.name()
+                    );
+                })
+                .unwrap();
+
+            queue.push(MountFSCopyInfo {
+                old_mount_fs: mfs.clone(),
+                parent_mount_fs: new_mntns.root_mntfs().clone(),
+                self_mp_inode_id: *ino,
+                mount_path,
+            });
+        }
 
-        todo!("Implement MntNamespace::copy_mnt_ns");
+        // 处理队列中的挂载点
+        while let Some(data) = queue.pop() {
+            let old_self_mp = data.old_mount_fs.self_mountpoint().unwrap();
+            let new_self_mp = old_self_mp.clone_with_new_mount_fs(data.parent_mount_fs.clone());
+            let new_mount_fs = data.old_mount_fs.deepcopy(Some(new_self_mp));
+            data.parent_mount_fs
+                .add_mount(data.self_mp_inode_id, new_mount_fs.clone())
+                .expect("Failed to add mount");
+            new_mntns
+                .add_mount(
+                    Some(data.self_mp_inode_id),
+                    data.mount_path.clone(),
+                    new_mount_fs.clone(),
+                )
+                .expect("Failed to add mount to mount namespace");
+
+            // 原有的挂载点的子挂载点加入队列中
+
+            for (child_ino, child_mfs) in data.old_mount_fs.mountpoints().iter() {
+                queue.push(MountFSCopyInfo {
+                    old_mount_fs: child_mfs.clone(),
+                    parent_mount_fs: new_mount_fs.clone(),
+                    self_mp_inode_id: *child_ino,
+                    mount_path: inner
+                        .mount_list
+                        .get_mount_path_by_ino(*child_ino)
+                        .expect("mount_path not found"),
+                });
+            }
+        }
+
+        // todo: 注册到procfs
+
+        // 返回新创建的mount namespace
+        Ok(new_mntns)
     }
 
     pub fn root_mntfs(&self) -> &Arc<MountFS> {
@@ -143,10 +242,11 @@ impl MntNamespace {
 
     pub fn add_mount(
         &self,
+        ino: Option<InodeId>,
         mount_path: Arc<MountPath>,
         mntfs: Arc<MountFS>,
     ) -> Result<(), SystemError> {
-        self.inner.lock().mount_list.insert(mount_path, mntfs);
+        self.inner.lock().mount_list.insert(ino, mount_path, mntfs);
         return Ok(());
     }
 
@@ -276,3 +376,16 @@ impl ProcessManager {
         }
     }
 }
+
+struct MountFSCopyInfo {
+    old_mount_fs: Arc<MountFS>,
+    parent_mount_fs: Arc<MountFS>,
+    self_mp_inode_id: InodeId,
+    mount_path: Arc<MountPath>,
+}
+
+// impl Drop for MntNamespace {
+//     fn drop(&mut self) {
+//         log::warn!("mntns (level: {}) dropped", self.ns_common.level);
+//     }
+// }

+ 1 - 1
kernel/src/process/namespace/nsproxy.rs

@@ -153,7 +153,7 @@ pub(super) fn create_new_namespaces(
 
 /// https://code.dragonos.org.cn/xref/linux-6.6.21/include/linux/ns_common.h#9
 /// 融合了 NamespaceBase 的公共字段
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct NsCommon {
     /// 层级(root = 0)
     pub level: u32,