Browse Source

feat(mm): 实现缺页中断处理,支持页面延迟分配和写时拷贝,以及用户栈自动拓展 (#715)

* 实现缺页中断处理

* 完善页表拷贝逻辑

* 优化代码结构

* 完善缺页异常信息

* 修改大页映射逻辑

* 修正大页映射错误

* 添加缺页中断支持标志

* 实现用户栈自动拓展功能
MemoryShore 10 months ago
parent
commit
a17651b14b

+ 1 - 0
kernel/crates/klog_types/src/lib.rs

@@ -121,6 +121,7 @@ pub enum AllocatorLogType {
     Alloc(AllocLogItem),
     AllocZeroed(AllocLogItem),
     Free(AllocLogItem),
+    LazyAlloc(AllocLogItem),
 }
 
 #[repr(C)]

+ 15 - 2
kernel/src/arch/riscv64/mm/mod.rs

@@ -42,8 +42,6 @@ pub(self) static INNER_ALLOCATOR: SpinLock<Option<BuddyAllocator<MMArch>>> = Spi
 pub struct RiscV64MMArch;
 
 impl RiscV64MMArch {
-    pub const ENTRY_FLAG_GLOBAL: usize = 1 << 5;
-
     /// 使远程cpu的TLB中,指定地址范围的页失效
     pub fn remote_invalidate_page(
         cpu: ProcessorId,
@@ -85,6 +83,9 @@ const KERNEL_TOP_PAGE_ENTRY_NO: usize = (RiscV64MMArch::PHYS_OFFSET
     >> (RiscV64MMArch::ENTRY_ADDRESS_SHIFT - RiscV64MMArch::PAGE_ENTRY_SHIFT);
 
 impl MemoryManagementArch for RiscV64MMArch {
+    /// riscv64暂不支持缺页中断
+    const PAGE_FAULT_ENABLED: bool = false;
+
     const PAGE_SHIFT: usize = 12;
 
     const PAGE_ENTRY_SHIFT: usize = 9;
@@ -119,6 +120,7 @@ impl MemoryManagementArch for RiscV64MMArch {
     const ENTRY_FLAG_EXEC: usize = (1 << 3);
     const ENTRY_FLAG_ACCESSED: usize = (1 << 6);
     const ENTRY_FLAG_DIRTY: usize = (1 << 7);
+    const ENTRY_FLAG_GLOBAL: usize = (1 << 5);
 
     const PHYS_OFFSET: usize = 0xffff_ffc0_0000_0000;
     const KERNEL_LINK_OFFSET: usize = 0x1000000;
@@ -139,6 +141,8 @@ impl MemoryManagementArch for RiscV64MMArch {
     /// 设置1g的MMIO空间
     const MMIO_SIZE: usize = 1 << PAGE_1G_SHIFT;
 
+    const ENTRY_FLAG_HUGE_PAGE: usize = Self::ENTRY_FLAG_PRESENT | Self::ENTRY_FLAG_READWRITE;
+
     #[inline(never)]
     unsafe fn init() {
         riscv_mm_init().expect("init kernel memory management architecture failed");
@@ -239,6 +243,15 @@ impl MemoryManagementArch for RiscV64MMArch {
         let r = ((ppn & ((1 << 54) - 1)) << 10) | page_flags;
         return r;
     }
+
+    fn vma_access_permitted(
+        _vma: alloc::sync::Arc<crate::mm::ucontext::LockedVMA>,
+        _write: bool,
+        _execute: bool,
+        _foreign: bool,
+    ) -> bool {
+        true
+    }
 }
 
 impl VirtAddr {

+ 96 - 35
kernel/src/arch/x86_64/interrupt/trap.rs

@@ -1,8 +1,12 @@
 use system_error::SystemError;
 
 use crate::{
-    arch::CurrentIrqArch, exception::InterruptArch, kerror, kwarn, mm::VirtAddr, print,
-    process::ProcessManager, smp::core::smp_get_processor_id,
+    arch::{CurrentIrqArch, MMArch},
+    exception::InterruptArch,
+    kerror, kwarn,
+    mm::VirtAddr,
+    process::ProcessManager,
+    smp::core::smp_get_processor_id,
 };
 
 use super::{
@@ -33,6 +37,46 @@ extern "C" {
     fn trap_virtualization_exception();
 }
 
+bitflags! {
+    pub struct TrapNr: u64 {
+        const X86_TRAP_DE = 0;
+        const X86_TRAP_DB = 1;
+        const X86_TRAP_NMI = 2;
+        const X86_TRAP_BP = 3;
+        const X86_TRAP_OF = 4;
+        const X86_TRAP_BR = 5;
+        const X86_TRAP_UD = 6;
+        const X86_TRAP_NM = 7;
+        const X86_TRAP_DF = 8;
+        const X86_TRAP_OLD_MF = 9;
+        const X86_TRAP_TS = 10;
+        const X86_TRAP_NP = 11;
+        const X86_TRAP_SS = 12;
+        const X86_TRAP_GP = 13;
+        const X86_TRAP_PF = 14;
+        const X86_TRAP_SPURIOUS = 15;
+        const X86_TRAP_MF = 16;
+        const X86_TRAP_AC = 17;
+        const X86_TRAP_MC = 18;
+        const X86_TRAP_XF = 19;
+        const X86_TRAP_VE = 20;
+        const X86_TRAP_CP = 21;
+        const X86_TRAP_VC = 29;
+        const X86_TRAP_IRET = 32;
+    }
+
+        pub struct X86PfErrorCode : u32{
+        const X86_PF_PROT = 1 << 0;
+        const X86_PF_WRITE = 1 << 1;
+        const X86_PF_USER = 1 << 2;
+        const X86_PF_RSVD = 1 << 3;
+        const X86_PF_INSTR = 1 << 4;
+        const X86_PF_PK = 1 << 5;
+        const X86_PF_SHSTK = 1 << 6;
+        const X86_PF_SGX = 1 << 15;
+    }
+}
+
 #[inline(never)]
 pub fn arch_trap_init() -> Result<(), SystemError> {
     unsafe {
@@ -319,42 +363,59 @@ Segment Selector Index: {:#x}\n
 /// 处理页错误 14 #PF
 #[no_mangle]
 unsafe extern "C" fn do_page_fault(regs: &'static TrapFrame, error_code: u64) {
-    kerror!(
-        "do_page_fault(14), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}, \nFault Address: {:#x}",
-        error_code,
-        regs.rsp,
-        regs.rip,
-        smp_get_processor_id().data(),
-        ProcessManager::current_pid(),
-        x86::controlregs::cr2()
-    );
-
-    if (error_code & 0x01) == 0 {
-        print!("Page Not Present,\t");
-    }
-    if (error_code & 0x02) != 0 {
-        print!("Write Access,\t");
-    } else {
-        print!("Read Access,\t");
-    }
-
-    if (error_code & 0x04) != 0 {
-        print!("Fault in user(3),\t");
+    // kerror!(
+    //     "do_page_fault(14), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}, \nFault Address: {:#x}",
+    //     error_code,
+    //     regs.rsp,
+    //     regs.rip,
+    //     smp_get_processor_id().data(),
+    //     ProcessManager::current_pid(),
+    //     x86::controlregs::cr2()
+    // );
+
+    // if (error_code & 0x01) == 0 {
+    //     print!("Page Not Present,\t");
+    // }
+    // if (error_code & 0x02) != 0 {
+    //     print!("Write Access,\t");
+    // } else {
+    //     print!("Read Access,\t");
+    // }
+
+    // if (error_code & 0x04) != 0 {
+    //     print!("Fault in user(3),\t");
+    // } else {
+    //     print!("Fault in supervisor(0,1,2),\t");
+    // }
+
+    // if (error_code & 0x08) != 0 {
+    //     print!("Reserved bit violation cause fault,\t");
+    // }
+
+    // if (error_code & 0x10) != 0 {
+    //     print!("Instruction fetch cause fault,\t");
+    // }
+    // print!("\n");
+
+    // CurrentIrqArch::interrupt_enable();
+    // panic!("Page Fault");
+    CurrentIrqArch::interrupt_disable();
+    let address = x86::controlregs::cr2();
+    // crate::kinfo!(
+    //     "fault address: {:#x}, error_code: {:#b}, pid: {}\n",
+    //     address,
+    //     error_code,
+    //     ProcessManager::current_pid().data()
+    // );
+
+    let address = VirtAddr::new(address);
+    let error_code = X86PfErrorCode::from_bits_truncate(error_code as u32);
+    if address.check_user() {
+        MMArch::do_user_addr_fault(regs, error_code, address);
     } else {
-        print!("Fault in supervisor(0,1,2),\t");
+        MMArch::do_kern_addr_fault(regs, error_code, address);
     }
-
-    if (error_code & 0x08) != 0 {
-        print!("Reserved bit violation cause fault,\t");
-    }
-
-    if (error_code & 0x10) != 0 {
-        print!("Instruction fetch cause fault,\t");
-    }
-    print!("\n");
-
     CurrentIrqArch::interrupt_enable();
-    panic!("Page Fault");
 }
 
 /// 处理x87 FPU错误 16 #MF

+ 300 - 0
kernel/src/arch/x86_64/mm/fault.rs

@@ -0,0 +1,300 @@
+use core::{
+    intrinsics::{likely, unlikely},
+    panic,
+};
+
+use alloc::sync::Arc;
+use x86::{bits64::rflags::RFlags, controlregs::Cr4};
+
+use crate::{
+    arch::{
+        interrupt::{trap::X86PfErrorCode, TrapFrame},
+        mm::{MemoryManagementArch, X86_64MMArch},
+        CurrentIrqArch, MMArch,
+    },
+    exception::InterruptArch,
+    kerror,
+    mm::{
+        fault::{FaultFlags, PageFaultHandler, PageFaultMessage},
+        ucontext::{AddressSpace, LockedVMA},
+        VirtAddr, VmFaultReason, VmFlags,
+    },
+};
+
+use super::LockedFrameAllocator;
+
+pub type PageMapper =
+    crate::mm::page::PageMapper<crate::arch::x86_64::mm::X86_64MMArch, LockedFrameAllocator>;
+
+impl X86_64MMArch {
+    pub fn vma_access_error(vma: Arc<LockedVMA>, error_code: X86PfErrorCode) -> bool {
+        let vm_flags = *vma.lock().vm_flags();
+        let foreign = false;
+        if error_code.contains(X86PfErrorCode::X86_PF_PK) {
+            return true;
+        }
+
+        if unlikely(error_code.contains(X86PfErrorCode::X86_PF_SGX)) {
+            return true;
+        }
+
+        if !Self::vma_access_permitted(
+            vma.clone(),
+            error_code.contains(X86PfErrorCode::X86_PF_WRITE),
+            error_code.contains(X86PfErrorCode::X86_PF_INSTR),
+            foreign,
+        ) {
+            return true;
+        }
+
+        if error_code.contains(X86PfErrorCode::X86_PF_WRITE) {
+            if unlikely(!vm_flags.contains(VmFlags::VM_WRITE)) {
+                return true;
+            }
+            return false;
+        }
+
+        if unlikely(error_code.contains(X86PfErrorCode::X86_PF_PROT)) {
+            return true;
+        }
+
+        if unlikely(!vma.is_accessible()) {
+            return true;
+        }
+        false
+    }
+
+    pub fn show_fault_oops(
+        regs: &'static TrapFrame,
+        error_code: X86PfErrorCode,
+        address: VirtAddr,
+    ) {
+        let mapper =
+            unsafe { PageMapper::current(crate::mm::PageTableKind::User, LockedFrameAllocator) };
+        if let Some(entry) = mapper.get_entry(address, 0) {
+            if entry.present() {
+                if !entry.flags().has_execute() {
+                    kerror!("kernel tried to execute NX-protected page - exploit attempt?");
+                } else if mapper.table().phys().data() & MMArch::ENTRY_FLAG_USER != 0
+                    && unsafe { x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_SMEP) }
+                {
+                    kerror!("unable to execute userspace code (SMEP?)");
+                }
+            }
+        }
+        if address.data() < X86_64MMArch::PAGE_SIZE && !regs.is_from_user() {
+            kerror!(
+                "BUG: kernel NULL pointer dereference, address: {:#x}",
+                address.data()
+            );
+        } else {
+            kerror!(
+                "BUG: unable to handle page fault for address: {:#x}",
+                address.data()
+            );
+        }
+
+        kerror!(
+            "#PF: {} {} in {} mode\n",
+            if error_code.contains(X86PfErrorCode::X86_PF_USER) {
+                "user"
+            } else {
+                "supervisor"
+            },
+            if error_code.contains(X86PfErrorCode::X86_PF_INSTR) {
+                "instruction fetch"
+            } else if error_code.contains(X86PfErrorCode::X86_PF_WRITE) {
+                "write access"
+            } else {
+                "read access"
+            },
+            if regs.is_from_user() {
+                "user"
+            } else {
+                "kernel"
+            }
+        );
+        kerror!(
+            "#PF: error_code({:#04x}) - {}\n",
+            error_code,
+            if !error_code.contains(X86PfErrorCode::X86_PF_PROT) {
+                "not-present page"
+            } else if error_code.contains(X86PfErrorCode::X86_PF_RSVD) {
+                "reserved bit violation"
+            } else if error_code.contains(X86PfErrorCode::X86_PF_PK) {
+                "protection keys violation"
+            } else {
+                "permissions violation"
+            }
+        );
+    }
+
+    pub fn page_fault_oops(
+        regs: &'static TrapFrame,
+        error_code: X86PfErrorCode,
+        address: VirtAddr,
+    ) {
+        if regs.is_from_user() {
+            Self::show_fault_oops(regs, error_code, address);
+        }
+        panic!()
+    }
+
+    /// 内核态缺页异常处理
+    /// ## 参数
+    ///
+    /// - `regs`: 中断栈帧
+    /// - `error_code`: 错误标志
+    /// - `address`: 发生缺页异常的虚拟地址
+    pub fn do_kern_addr_fault(
+        _regs: &'static TrapFrame,
+        error_code: X86PfErrorCode,
+        address: VirtAddr,
+    ) {
+        panic!(
+            "do_kern_addr_fault has not yet been implemented, 
+        fault address: {:#x}, 
+        error_code: {:#b}, 
+        pid: {}\n",
+            address.data(),
+            error_code,
+            crate::process::ProcessManager::current_pid().data()
+        );
+        //TODO https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/mm/fault.c#do_kern_addr_fault
+    }
+
+    /// 用户态缺页异常处理
+    /// ## 参数
+    ///
+    /// - `regs`: 中断栈帧
+    /// - `error_code`: 错误标志
+    /// - `address`: 发生缺页异常的虚拟地址
+    pub unsafe fn do_user_addr_fault(
+        regs: &'static TrapFrame,
+        error_code: X86PfErrorCode,
+        address: VirtAddr,
+    ) {
+        let rflags = RFlags::from_bits_truncate(regs.rflags);
+        let mut flags: FaultFlags = FaultFlags::FAULT_FLAG_ALLOW_RETRY
+            | FaultFlags::FAULT_FLAG_KILLABLE
+            | FaultFlags::FAULT_FLAG_INTERRUPTIBLE;
+
+        if error_code & (X86PfErrorCode::X86_PF_USER | X86PfErrorCode::X86_PF_INSTR)
+            == X86PfErrorCode::X86_PF_INSTR
+        {
+            Self::page_fault_oops(regs, error_code, address);
+        }
+
+        let feature = x86::cpuid::CpuId::new()
+            .get_extended_feature_info()
+            .unwrap();
+        if unlikely(
+            feature.has_smap()
+                && !error_code.contains(X86PfErrorCode::X86_PF_USER)
+                && rflags.contains(RFlags::FLAGS_AC),
+        ) {
+            Self::page_fault_oops(regs, error_code, address);
+        }
+
+        if unlikely(error_code.contains(X86PfErrorCode::X86_PF_RSVD)) {
+            // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/mm/fault.c#pgtable_bad
+            panic!(
+                "Reserved bits are never expected to be set, error_code: {:#b}, address: {:#x}",
+                error_code,
+                address.data()
+            );
+        }
+
+        if regs.is_from_user() {
+            unsafe { CurrentIrqArch::interrupt_enable() };
+            flags |= FaultFlags::FAULT_FLAG_USER;
+        } else if rflags.contains(RFlags::FLAGS_IF) {
+            unsafe { CurrentIrqArch::interrupt_enable() };
+        }
+
+        if error_code.contains(X86PfErrorCode::X86_PF_SHSTK) {
+            flags |= FaultFlags::FAULT_FLAG_WRITE;
+        }
+        if error_code.contains(X86PfErrorCode::X86_PF_WRITE) {
+            flags |= FaultFlags::FAULT_FLAG_WRITE;
+        }
+        if error_code.contains(X86PfErrorCode::X86_PF_INSTR) {
+            flags |= FaultFlags::FAULT_FLAG_INSTRUCTION;
+        }
+
+        let current_address_space: Arc<AddressSpace> = AddressSpace::current().unwrap();
+        let mut space_guard = current_address_space.write();
+        let mut fault;
+        loop {
+            let vma = space_guard.mappings.find_nearest(address);
+            // let vma = space_guard.mappings.contains(address);
+
+            let vma = vma.unwrap_or_else(|| {
+                panic!(
+                    "can not find nearest vma, error_code: {:#b}, address: {:#x}",
+                    error_code,
+                    address.data(),
+                )
+            });
+            let guard = vma.lock();
+            let region = *guard.region();
+            let vm_flags = *guard.vm_flags();
+            drop(guard);
+
+            if !region.contains(address) {
+                if vm_flags.contains(VmFlags::VM_GROWSDOWN) {
+                    space_guard
+                        .extend_stack(region.start() - address)
+                        .unwrap_or_else(|_| {
+                            panic!(
+                                "user stack extend failed, error_code: {:#b}, address: {:#x}",
+                                error_code,
+                                address.data(),
+                            )
+                        });
+                } else {
+                    panic!(
+                        "No mapped vma, error_code: {:#b}, address: {:#x}",
+                        error_code,
+                        address.data(),
+                    )
+                }
+            }
+
+            if unlikely(Self::vma_access_error(vma.clone(), error_code)) {
+                panic!(
+                    "vma access error, error_code: {:#b}, address: {:#x}",
+                    error_code,
+                    address.data(),
+                );
+            }
+            let mapper = &mut space_guard.user_mapper.utable;
+
+            fault = PageFaultHandler::handle_mm_fault(
+                PageFaultMessage::new(vma.clone(), address, flags),
+                mapper,
+            );
+
+            if fault.contains(VmFaultReason::VM_FAULT_COMPLETED) {
+                return;
+            }
+
+            if unlikely(fault.contains(VmFaultReason::VM_FAULT_RETRY)) {
+                flags |= FaultFlags::FAULT_FLAG_TRIED;
+            } else {
+                break;
+            }
+        }
+
+        let vm_fault_error = VmFaultReason::VM_FAULT_OOM
+            | VmFaultReason::VM_FAULT_SIGBUS
+            | VmFaultReason::VM_FAULT_SIGSEGV
+            | VmFaultReason::VM_FAULT_HWPOISON
+            | VmFaultReason::VM_FAULT_HWPOISON_LARGE
+            | VmFaultReason::VM_FAULT_FALLBACK;
+
+        if likely(!fault.contains(vm_fault_error)) {
+            panic!("fault error: {:?}", fault)
+        }
+    }
+}

+ 29 - 7
kernel/src/arch/x86_64/mm/mod.rs

@@ -1,6 +1,9 @@
 pub mod barrier;
 pub mod bump;
+pub mod fault;
+pub mod pkru;
 
+use alloc::sync::Arc;
 use alloc::vec::Vec;
 use hashbrown::HashSet;
 use x86::time::rdtsc;
@@ -17,6 +20,7 @@ use crate::libs::spinlock::SpinLock;
 
 use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PageFrameUsage};
 use crate::mm::memblock::mem_block_manager;
+use crate::mm::ucontext::LockedVMA;
 use crate::{
     arch::MMArch,
     mm::allocator::{buddy::BuddyAllocator, bump::BumpAllocator},
@@ -44,10 +48,6 @@ pub type PageMapper =
 /// 初始的CR3寄存器的值,用于内存管理初始化时,创建的第一个内核页表的位置
 static mut INITIAL_CR3_VALUE: PhysAddr = PhysAddr::new(0);
 
-/// 内核的第一个页表在pml4中的索引
-/// 顶级页表的[256, 512)项是内核的页表
-static KERNEL_PML4E_NO: usize = (X86_64MMArch::PHYS_OFFSET & ((1 << 48) - 1)) >> 39;
-
 static INNER_ALLOCATOR: SpinLock<Option<BuddyAllocator<MMArch>>> = SpinLock::new(None);
 
 #[derive(Clone, Copy, Debug)]
@@ -70,6 +70,8 @@ pub struct X86_64MMArch;
 static XD_RESERVED: AtomicBool = AtomicBool::new(false);
 
 impl MemoryManagementArch for X86_64MMArch {
+    /// X86目前支持缺页中断
+    const PAGE_FAULT_ENABLED: bool = true;
     /// 4K页
     const PAGE_SHIFT: usize = 12;
 
@@ -104,8 +106,10 @@ impl MemoryManagementArch for X86_64MMArch {
     /// x86_64不存在EXEC标志位,只有NO_EXEC(XD)标志位
     const ENTRY_FLAG_EXEC: usize = 0;
 
-    const ENTRY_FLAG_ACCESSED: usize = 0;
-    const ENTRY_FLAG_DIRTY: usize = 0;
+    const ENTRY_FLAG_ACCESSED: usize = 1 << 5;
+    const ENTRY_FLAG_DIRTY: usize = 1 << 6;
+    const ENTRY_FLAG_HUGE_PAGE: usize = 1 << 7;
+    const ENTRY_FLAG_GLOBAL: usize = 1 << 8;
 
     /// 物理地址与虚拟地址的偏移量
     /// 0xffff_8000_0000_0000
@@ -237,7 +241,7 @@ impl MemoryManagementArch for X86_64MMArch {
         };
 
         // 复制内核的映射
-        for pml4_entry_no in KERNEL_PML4E_NO..512 {
+        for pml4_entry_no in MMArch::PAGE_KERNEL_INDEX..MMArch::PAGE_ENTRY_NUM {
             copy_mapping(pml4_entry_no);
         }
 
@@ -262,6 +266,9 @@ impl MemoryManagementArch for X86_64MMArch {
 
     const PAGE_ENTRY_MASK: usize = Self::PAGE_ENTRY_NUM - 1;
 
+    const PAGE_KERNEL_INDEX: usize = (Self::PHYS_OFFSET & Self::PAGE_ADDRESS_MASK)
+        >> (Self::PAGE_ADDRESS_SHIFT - Self::PAGE_ENTRY_SHIFT);
+
     const PAGE_NEGATIVE_MASK: usize = !((Self::PAGE_ADDRESS_SIZE) - 1);
 
     const ENTRY_ADDRESS_SIZE: usize = 1 << Self::ENTRY_ADDRESS_SHIFT;
@@ -302,6 +309,21 @@ impl MemoryManagementArch for X86_64MMArch {
     fn make_entry(paddr: PhysAddr, page_flags: usize) -> usize {
         return paddr.data() | page_flags;
     }
+
+    fn vma_access_permitted(
+        vma: Arc<LockedVMA>,
+        write: bool,
+        execute: bool,
+        foreign: bool,
+    ) -> bool {
+        if execute {
+            return true;
+        }
+        if foreign | vma.is_foreign() {
+            return true;
+        }
+        pkru::pkru_allows_pkey(pkru::vma_pkey(vma), write)
+    }
 }
 
 impl X86_64MMArch {

+ 56 - 0
kernel/src/arch/x86_64/mm/pkru.rs

@@ -0,0 +1,56 @@
+use alloc::sync::Arc;
+
+use crate::mm::ucontext::LockedVMA;
+
+const VM_PKEY_SHIFT: usize = 32;
+
+/// X86_64架构的ProtectionKey使用32、33、34、35四个比特位
+const PKEY_MASK: usize = 1 << 32 | 1 << 33 | 1 << 34 | 1 << 35;
+
+/// 获取vma的protection_key
+///
+/// ## 参数
+///
+/// - `vma`: VMA
+///
+/// ## 返回值
+/// - `u16`: vma的protection_key
+pub fn vma_pkey(vma: Arc<LockedVMA>) -> u16 {
+    let guard = vma.lock();
+    ((guard.vm_flags().bits() & PKEY_MASK as u64) >> VM_PKEY_SHIFT) as u16
+}
+
+// TODO pkru实现参考:https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/include/asm/pkru.h
+
+const PKRU_AD_BIT: u16 = 0x1;
+const PKRU_WD_BIT: u16 = 0x2;
+const PKRU_BITS_PER_PKEY: u32 = 2;
+
+pub fn pkru_allows_pkey(pkey: u16, write: bool) -> bool {
+    let pkru = read_pkru();
+
+    if !pkru_allows_read(pkru, pkey) {
+        return false;
+    }
+    if write & !pkru_allows_write(pkru, pkey) {
+        return false;
+    }
+
+    true
+}
+
+pub fn pkru_allows_read(pkru: u32, pkey: u16) -> bool {
+    let pkru_pkey_bits: u32 = pkey as u32 * PKRU_BITS_PER_PKEY;
+    pkru & ((PKRU_AD_BIT as u32) << pkru_pkey_bits) > 0
+}
+
+pub fn pkru_allows_write(pkru: u32, pkey: u16) -> bool {
+    let pkru_pkey_bits: u32 = pkey as u32 * PKRU_BITS_PER_PKEY;
+    pkru & (((PKRU_AD_BIT | PKRU_WD_BIT) as u32) << pkru_pkey_bits) > 0
+}
+
+pub fn read_pkru() -> u32 {
+    // TODO 实现读取pkru逻辑
+    // https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/include/asm/pkru.h?fi=read_pkru#read_pkru
+    0
+}

+ 13 - 5
kernel/src/arch/x86_64/process/syscall.rs

@@ -74,18 +74,26 @@ impl Syscall {
         param.init_info_mut().envs = envp;
 
         // 把proc_init_info写到用户栈上
-
+        let mut ustack_message = unsafe {
+            address_space
+                .write()
+                .user_stack_mut()
+                .expect("No user stack found")
+                .clone_info_only()
+        };
         let (user_sp, argv_ptr) = unsafe {
             param
                 .init_info()
                 .push_at(
-                    address_space
-                        .write()
-                        .user_stack_mut()
-                        .expect("No user stack found"),
+                    // address_space
+                    //     .write()
+                    //     .user_stack_mut()
+                    //     .expect("No user stack found"),
+                    &mut ustack_message,
                 )
                 .expect("Failed to push proc_init_info to user stack")
         };
+        address_space.write().user_stack = Some(ustack_message);
 
         // kdebug!("write proc_init_info to user stack done");
 

+ 3 - 2
kernel/src/libs/elf.rs

@@ -131,6 +131,7 @@ impl ElfLoader {
                 prot_flags,
                 MapFlags::MAP_ANONYMOUS | MapFlags::MAP_FIXED_NOREPLACE,
                 false,
+                true,
             );
             if r.is_err() {
                 kerror!("set_elf_brk: map_anonymous failed, err={:?}", r);
@@ -256,7 +257,7 @@ impl ElfLoader {
             // kdebug!("total_size={}", total_size);
 
             map_addr = user_vm_guard
-                .map_anonymous(addr_to_map, total_size, tmp_prot, *map_flags, false)
+                .map_anonymous(addr_to_map, total_size, tmp_prot, *map_flags, false, true)
                 .map_err(map_err_handler)?
                 .virt_address();
             // kdebug!("map ok: addr_to_map={:?}", addr_to_map);
@@ -288,7 +289,7 @@ impl ElfLoader {
             // kdebug!("total size = 0");
 
             map_addr = user_vm_guard
-                .map_anonymous(addr_to_map, map_size, tmp_prot, *map_flags, false)?
+                .map_anonymous(addr_to_map, map_size, tmp_prot, *map_flags, false, true)?
                 .virt_address();
             // kdebug!(
             //     "map ok: addr_to_map={:?}, map_addr={map_addr:?},beginning_page_offset={beginning_page_offset:?}",

+ 395 - 0
kernel/src/mm/fault.rs

@@ -0,0 +1,395 @@
+use core::{alloc::Layout, intrinsics::unlikely, panic};
+
+use alloc::sync::Arc;
+
+use crate::{
+    arch::{mm::PageMapper, MMArch},
+    mm::{
+        page::{page_manager_lock_irqsave, PageFlags},
+        ucontext::LockedVMA,
+        VirtAddr, VmFaultReason, VmFlags,
+    },
+    process::{ProcessManager, ProcessState},
+};
+
+use crate::mm::MemoryManagementArch;
+
+bitflags! {
+    pub struct FaultFlags: u64{
+    const FAULT_FLAG_WRITE = 1 << 0;
+    const FAULT_FLAG_MKWRITE = 1 << 1;
+    const FAULT_FLAG_ALLOW_RETRY = 1 << 2;
+    const FAULT_FLAG_RETRY_NOWAIT = 1 << 3;
+    const FAULT_FLAG_KILLABLE = 1 << 4;
+    const FAULT_FLAG_TRIED = 1 << 5;
+    const FAULT_FLAG_USER = 1 << 6;
+    const FAULT_FLAG_REMOTE = 1 << 7;
+    const FAULT_FLAG_INSTRUCTION = 1 << 8;
+    const FAULT_FLAG_INTERRUPTIBLE =1 << 9;
+    const FAULT_FLAG_UNSHARE = 1 << 10;
+    const FAULT_FLAG_ORIG_PTE_VALID = 1 << 11;
+    const FAULT_FLAG_VMA_LOCK = 1 << 12;
+    }
+}
+
+/// # 缺页异常信息结构体
+/// 包含了页面错误处理的相关信息,例如出错的地址、VMA等
+#[derive(Debug)]
+pub struct PageFaultMessage {
+    vma: Arc<LockedVMA>,
+    address: VirtAddr,
+    flags: FaultFlags,
+}
+
+impl PageFaultMessage {
+    pub fn new(vma: Arc<LockedVMA>, address: VirtAddr, flags: FaultFlags) -> Self {
+        Self {
+            vma: vma.clone(),
+            address,
+            flags,
+        }
+    }
+
+    #[inline(always)]
+    #[allow(dead_code)]
+    pub fn vma(&self) -> Arc<LockedVMA> {
+        self.vma.clone()
+    }
+
+    #[inline(always)]
+    #[allow(dead_code)]
+    pub fn address(&self) -> VirtAddr {
+        self.address
+    }
+
+    #[inline(always)]
+    #[allow(dead_code)]
+    pub fn address_aligned_down(&self) -> VirtAddr {
+        VirtAddr::new(crate::libs::align::page_align_down(self.address.data()))
+    }
+
+    #[inline(always)]
+    #[allow(dead_code)]
+    pub fn flags(&self) -> FaultFlags {
+        self.flags
+    }
+}
+
+/// 缺页中断处理结构体
+pub struct PageFaultHandler;
+
+impl PageFaultHandler {
+    /// 处理缺页异常
+    /// ## 参数
+    ///
+    /// - `pfm`: 缺页异常信息
+    /// - `mapper`: 页表映射器
+    ///
+    /// ## 返回值
+    /// - VmFaultReason: 页面错误处理信息标志
+    pub unsafe fn handle_mm_fault(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason {
+        let flags = pfm.flags();
+        let vma = pfm.vma();
+        let current_pcb = ProcessManager::current_pcb();
+        let mut guard = current_pcb.sched_info().inner_lock_write_irqsave();
+        guard.set_state(ProcessState::Runnable);
+
+        if !MMArch::vma_access_permitted(
+            vma.clone(),
+            flags.contains(FaultFlags::FAULT_FLAG_WRITE),
+            flags.contains(FaultFlags::FAULT_FLAG_INSTRUCTION),
+            flags.contains(FaultFlags::FAULT_FLAG_REMOTE),
+        ) {
+            return VmFaultReason::VM_FAULT_SIGSEGV;
+        }
+
+        let guard = vma.lock();
+        let vm_flags = *guard.vm_flags();
+        drop(guard);
+        if unlikely(vm_flags.contains(VmFlags::VM_HUGETLB)) {
+            //TODO: 添加handle_hugetlb_fault处理大页缺页异常
+        } else {
+            Self::handle_normal_fault(pfm, mapper);
+        }
+
+        VmFaultReason::VM_FAULT_COMPLETED
+    }
+
+    /// 处理普通页缺页异常
+    /// ## 参数
+    ///
+    /// - `pfm`: 缺页异常信息
+    /// - `mapper`: 页表映射器
+    ///
+    /// ## 返回值
+    /// - VmFaultReason: 页面错误处理信息标志
+    pub unsafe fn handle_normal_fault(
+        pfm: PageFaultMessage,
+        mapper: &mut PageMapper,
+    ) -> VmFaultReason {
+        let address = pfm.address_aligned_down();
+        let vma = pfm.vma.clone();
+        if mapper.get_entry(address, 3).is_none() {
+            mapper
+                .allocate_table(address, 2)
+                .expect("failed to allocate PUD table");
+        }
+        let page_flags = vma.lock().flags();
+
+        for level in 2..=3 {
+            let level = MMArch::PAGE_LEVELS - level;
+            if mapper.get_entry(address, level).is_none() {
+                if vma.is_hugepage() {
+                    if vma.is_anonymous() {
+                        mapper.map_huge_page(address, page_flags);
+                    }
+                } else if mapper.allocate_table(address, level - 1).is_none() {
+                    return VmFaultReason::VM_FAULT_OOM;
+                }
+            }
+        }
+
+        Self::handle_pte_fault(pfm, mapper)
+    }
+
+    /// 处理页表项异常
+    /// ## 参数
+    ///
+    /// - `pfm`: 缺页异常信息
+    /// - `mapper`: 页表映射器
+    ///
+    /// ## 返回值
+    /// - VmFaultReason: 页面错误处理信息标志
+    pub unsafe fn handle_pte_fault(
+        pfm: PageFaultMessage,
+        mapper: &mut PageMapper,
+    ) -> VmFaultReason {
+        let address = pfm.address_aligned_down();
+        let flags = pfm.flags;
+        let vma = pfm.vma.clone();
+        if let Some(mut entry) = mapper.get_entry(address, 0) {
+            if !entry.present() {
+                return Self::do_swap_page(pfm, mapper);
+            }
+            if entry.protnone() && vma.is_accessible() {
+                return Self::do_numa_page(pfm, mapper);
+            }
+            if flags.intersects(FaultFlags::FAULT_FLAG_WRITE | FaultFlags::FAULT_FLAG_UNSHARE) {
+                if !entry.write() {
+                    return Self::do_wp_page(pfm, mapper);
+                } else {
+                    entry.set_flags(PageFlags::from_data(MMArch::ENTRY_FLAG_DIRTY));
+                }
+            }
+        } else if vma.is_anonymous() {
+            return Self::do_anonymous_page(pfm, mapper);
+        } else {
+            return Self::do_fault(pfm, mapper);
+        }
+
+        VmFaultReason::VM_FAULT_COMPLETED
+    }
+
+    /// 处理匿名映射页缺页异常
+    /// ## 参数
+    ///
+    /// - `pfm`: 缺页异常信息
+    /// - `mapper`: 页表映射器
+    ///
+    /// ## 返回值
+    /// - VmFaultReason: 页面错误处理信息标志
+    pub unsafe fn do_anonymous_page(
+        pfm: PageFaultMessage,
+        mapper: &mut PageMapper,
+    ) -> VmFaultReason {
+        let address = pfm.address_aligned_down();
+        let vma = pfm.vma.clone();
+        let guard = vma.lock();
+        if let Some(flush) = mapper.map(address, guard.flags()) {
+            flush.flush();
+            crate::debug::klog::mm::mm_debug_log(
+                klog_types::AllocatorLogType::LazyAlloc(klog_types::AllocLogItem::new(
+                    Layout::from_size_align(MMArch::PAGE_SIZE, MMArch::PAGE_SIZE).unwrap(),
+                    Some(address.data()),
+                    Some(mapper.translate(address).unwrap().0.data()),
+                )),
+                klog_types::LogSource::Buddy,
+            );
+            let paddr = mapper.translate(address).unwrap().0;
+            let mut anon_vma_guard = page_manager_lock_irqsave();
+            let page = anon_vma_guard.get_mut(&paddr);
+            page.insert_vma(vma.clone());
+            VmFaultReason::VM_FAULT_COMPLETED
+        } else {
+            VmFaultReason::VM_FAULT_OOM
+        }
+    }
+
+    /// 处理文件映射页的缺页异常
+    /// ## 参数
+    ///
+    /// - `pfm`: 缺页异常信息
+    /// - `mapper`: 页表映射器
+    ///
+    /// ## 返回值
+    /// - VmFaultReason: 页面错误处理信息标志
+    #[allow(unused_variables)]
+    pub unsafe fn do_fault(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason {
+        panic!(
+            "do_fault has not yet been implemented, 
+        fault message: {:?}, 
+        pid: {}\n",
+            pfm,
+            crate::process::ProcessManager::current_pid().data()
+        );
+        // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_fault
+    }
+
+    /// 处理私有文件映射的写时复制
+    /// ## 参数
+    ///
+    /// - `pfm`: 缺页异常信息
+    /// - `mapper`: 页表映射器
+    ///
+    /// ## 返回值
+    /// - VmFaultReason: 页面错误处理信息标志
+    #[allow(dead_code, unused_variables)]
+    pub unsafe fn do_cow_fault(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason {
+        panic!(
+            "do_cow_fault has not yet been implemented, 
+        fault message: {:?}, 
+        pid: {}\n",
+            pfm,
+            crate::process::ProcessManager::current_pid().data()
+        );
+        // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_cow_fault
+    }
+
+    /// 处理文件映射页的缺页异常
+    /// ## 参数
+    ///
+    /// - `pfm`: 缺页异常信息
+    /// - `mapper`: 页表映射器
+    ///
+    /// ## 返回值
+    /// - VmFaultReason: 页面错误处理信息标志
+    #[allow(dead_code, unused_variables)]
+    pub unsafe fn do_read_fault(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason {
+        panic!(
+            "do_read_fault has not yet been implemented, 
+        fault message: {:?}, 
+        pid: {}\n",
+            pfm,
+            crate::process::ProcessManager::current_pid().data()
+        );
+        // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_read_fault
+    }
+
+    /// 处理对共享文件映射区写入引起的缺页
+    /// ## 参数
+    ///
+    /// - `pfm`: 缺页异常信息
+    /// - `mapper`: 页表映射器
+    ///
+    /// ## 返回值
+    /// - VmFaultReason: 页面错误处理信息标志
+    #[allow(dead_code, unused_variables)]
+    pub unsafe fn do_shared_fault(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason {
+        panic!(
+            "do_shared_fault has not yet been implemented, 
+        fault message: {:?}, 
+        pid: {}\n",
+            pfm,
+            crate::process::ProcessManager::current_pid().data()
+        );
+        // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_shared_fault
+    }
+
+    /// 处理被置换页面的缺页异常
+    /// ## 参数
+    ///
+    /// - `pfm`: 缺页异常信息
+    /// - `mapper`: 页表映射器
+    ///
+    /// ## 返回值
+    /// - VmFaultReason: 页面错误处理信息标志
+    #[allow(unused_variables)]
+    pub unsafe fn do_swap_page(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason {
+        panic!(
+            "do_swap_page has not yet been implemented, 
+        fault message: {:?}, 
+        pid: {}\n",
+            pfm,
+            crate::process::ProcessManager::current_pid().data()
+        );
+        // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_swap_page
+    }
+
+    /// 处理NUMA的缺页异常
+    /// ## 参数
+    ///
+    /// - `pfm`: 缺页异常信息
+    /// - `mapper`: 页表映射器
+    ///
+    /// ## 返回值
+    /// - VmFaultReason: 页面错误处理信息标志
+    #[allow(unused_variables)]
+    pub unsafe fn do_numa_page(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason {
+        panic!(
+            "do_numa_page has not yet been implemented, 
+        fault message: {:?}, 
+        pid: {}\n",
+            pfm,
+            crate::process::ProcessManager::current_pid().data()
+        );
+        // TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/memory.c#do_numa_page
+    }
+
+    /// 处理写保护页面的写保护异常
+    /// ## 参数
+    ///
+    /// - `pfm`: 缺页异常信息
+    /// - `mapper`: 页表映射器
+    ///
+    /// ## 返回值
+    /// - VmFaultReason: 页面错误处理信息标志
+    pub unsafe fn do_wp_page(pfm: PageFaultMessage, mapper: &mut PageMapper) -> VmFaultReason {
+        let address = pfm.address_aligned_down();
+        let vma = pfm.vma.clone();
+        let old_paddr = mapper.translate(address).unwrap().0;
+        let mut page_manager = page_manager_lock_irqsave();
+        let map_count = page_manager.get_mut(&old_paddr).map_count();
+        drop(page_manager);
+
+        let mut entry = mapper.get_entry(address, 0).unwrap();
+        let new_flags = entry.flags().set_write(true);
+
+        if map_count == 1 {
+            let table = mapper.get_table(address, 0).unwrap();
+            let i = table.index_of(address).unwrap();
+            entry.set_flags(new_flags);
+            table.set_entry(i, entry);
+            VmFaultReason::VM_FAULT_COMPLETED
+        } else if let Some(flush) = mapper.map(address, new_flags) {
+            let mut page_manager = page_manager_lock_irqsave();
+            let old_page = page_manager.get_mut(&old_paddr);
+            old_page.remove_vma(&vma);
+            drop(page_manager);
+
+            flush.flush();
+            let paddr = mapper.translate(address).unwrap().0;
+            let mut anon_vma_guard = page_manager_lock_irqsave();
+            let page = anon_vma_guard.get_mut(&paddr);
+            page.insert_vma(vma.clone());
+
+            (MMArch::phys_2_virt(paddr).unwrap().data() as *mut u8).copy_from_nonoverlapping(
+                MMArch::phys_2_virt(old_paddr).unwrap().data() as *mut u8,
+                MMArch::PAGE_SIZE,
+            );
+
+            VmFaultReason::VM_FAULT_COMPLETED
+        } else {
+            VmFaultReason::VM_FAULT_OOM
+        }
+    }
+}

+ 84 - 0
kernel/src/mm/madvise.rs

@@ -0,0 +1,84 @@
+use system_error::SystemError;
+
+use crate::arch::{mm::PageMapper, MMArch};
+
+use super::{page::Flusher, syscall::MadvFlags, ucontext::LockedVMA, VmFlags};
+
+impl LockedVMA {
+    pub fn do_madvise(
+        &self,
+        behavior: MadvFlags,
+        _mapper: &mut PageMapper,
+        _flusher: impl Flusher<MMArch>,
+    ) -> Result<(), SystemError> {
+        //TODO https://code.dragonos.org.cn/xref/linux-6.6.21/mm/madvise.c?fi=madvise#do_madvise
+        let mut vma = self.lock();
+        let mut new_flags = *vma.vm_flags();
+        match behavior {
+            MadvFlags::MADV_REMOVE => {
+                // TODO
+            }
+
+            MadvFlags::MADV_WILLNEED => {
+                // TODO
+            }
+
+            MadvFlags::MADV_COLD => {
+                // TODO
+            }
+
+            MadvFlags::MADV_PAGEOUT => {
+                // TODO
+            }
+
+            MadvFlags::MADV_FREE => {
+                // TODO
+            }
+
+            MadvFlags::MADV_POPULATE_READ | MadvFlags::MADV_POPULATE_WRITE => {
+                // TODO
+            }
+
+            MadvFlags::MADV_NORMAL => {
+                new_flags = new_flags & !VmFlags::VM_RAND_READ & !VmFlags::VM_SEQ_READ
+            }
+
+            MadvFlags::MADV_SEQUENTIAL => {
+                new_flags = (new_flags & !VmFlags::VM_RAND_READ) | VmFlags::VM_SEQ_READ
+            }
+            MadvFlags::MADV_RANDOM => {
+                new_flags = (new_flags & !VmFlags::VM_SEQ_READ) | VmFlags::VM_RAND_READ
+            }
+
+            MadvFlags::MADV_DONTFORK => new_flags |= VmFlags::VM_DONTCOPY,
+
+            MadvFlags::MADV_DOFORK => {
+                if vma.vm_flags().contains(VmFlags::VM_IO) {
+                    return Err(SystemError::EINVAL);
+                }
+                new_flags &= !VmFlags::VM_DONTCOPY;
+            }
+
+            MadvFlags::MADV_WIPEONFORK => {
+                //MADV_WIPEONFORK仅支持匿名映射,后续实现其他映射方式后要在此处添加判断条件
+                new_flags |= VmFlags::VM_WIPEONFORK;
+            }
+
+            MadvFlags::MADV_KEEPONFORK => new_flags &= !VmFlags::VM_WIPEONFORK,
+
+            MadvFlags::MADV_DONTDUMP => new_flags |= VmFlags::VM_DONTDUMP,
+
+            //MADV_DODUMP不支持巨页映射,后续需要添加判断条件
+            MadvFlags::MADV_DODUMP => new_flags &= !VmFlags::VM_DONTDUMP,
+
+            MadvFlags::MADV_MERGEABLE | MadvFlags::MADV_UNMERGEABLE => {}
+
+            MadvFlags::MADV_HUGEPAGE | MadvFlags::MADV_NOHUGEPAGE => {}
+
+            MadvFlags::MADV_COLLAPSE => {}
+            _ => {}
+        }
+        vma.set_vm_flags(new_flags);
+        Ok(())
+    }
+}

+ 53 - 2
kernel/src/mm/mod.rs

@@ -16,14 +16,16 @@ use self::{
     allocator::page_frame::{VirtPageFrame, VirtPageFrameIter},
     memblock::MemoryAreaAttr,
     page::round_up_to_page_size,
-    ucontext::{AddressSpace, UserMapper},
+    ucontext::{AddressSpace, LockedVMA, UserMapper},
 };
 
 pub mod allocator;
 pub mod c_adapter;
 pub mod early_ioremap;
+pub mod fault;
 pub mod init;
 pub mod kernel_mapper;
+pub mod madvise;
 pub mod memblock;
 pub mod mmio_buddy;
 pub mod no_init;
@@ -38,7 +40,7 @@ static mut __IDLE_PROCESS_ADDRESS_SPACE: Option<Arc<AddressSpace>> = None;
 bitflags! {
     /// Virtual memory flags
     #[allow(clippy::bad_bit_mask)]
-    pub struct VmFlags:u32{
+    pub struct VmFlags:u64{
         const VM_NONE = 0x00000000;
 
         const VM_READ = 0x00000001;
@@ -73,6 +75,25 @@ bitflags! {
         const VM_WIPEONFORK = 0x02000000;
         const VM_DONTDUMP = 0x04000000;
     }
+
+    /// 描述页面错误处理过程中发生的不同情况或结果
+        pub struct VmFaultReason:u32 {
+        const VM_FAULT_OOM = 0x000001;
+        const VM_FAULT_SIGBUS = 0x000002;
+        const VM_FAULT_MAJOR = 0x000004;
+        const VM_FAULT_WRITE = 0x000008;
+        const VM_FAULT_HWPOISON = 0x000010;
+        const VM_FAULT_HWPOISON_LARGE = 0x000020;
+        const VM_FAULT_SIGSEGV = 0x000040;
+        const VM_FAULT_NOPAGE = 0x000100;
+        const VM_FAULT_LOCKED = 0x000200;
+        const VM_FAULT_RETRY = 0x000400;
+        const VM_FAULT_FALLBACK = 0x000800;
+        const VM_FAULT_DONE_COW = 0x001000;
+        const VM_FAULT_NEEDDSYNC = 0x002000;
+        const VM_FAULT_COMPLETED = 0x004000;
+        const VM_FAULT_HINDEX_MASK = 0x0f0000;
+    }
 }
 
 /// 获取内核IDLE进程的用户地址空间结构体
@@ -407,6 +428,8 @@ impl Default for PhysMemoryArea {
 }
 
 pub trait MemoryManagementArch: Clone + Copy + Debug {
+    /// 是否支持缺页中断
+    const PAGE_FAULT_ENABLED: bool;
     /// 页面大小的shift(假如页面4K,那么这个值就是12,因为2^12=4096)
     const PAGE_SHIFT: usize;
     /// 每个页表的页表项数目。(以2^n次幂来表示)假如有512个页表项,那么这个值就是9
@@ -440,6 +463,10 @@ pub trait MemoryManagementArch: Clone + Copy + Debug {
     const ENTRY_FLAG_DIRTY: usize;
     /// 当该位为1时,代表这个页面被处理器访问过
     const ENTRY_FLAG_ACCESSED: usize;
+    /// 标记该页表项指向的页是否为大页
+    const ENTRY_FLAG_HUGE_PAGE: usize;
+    /// 当该位为1时,代表该页表项是全局的
+    const ENTRY_FLAG_GLOBAL: usize;
 
     /// 虚拟地址与物理地址的偏移量
     const PHYS_OFFSET: usize;
@@ -468,6 +495,9 @@ pub trait MemoryManagementArch: Clone + Copy + Debug {
     const PAGE_ENTRY_NUM: usize = 1 << Self::PAGE_ENTRY_SHIFT;
     /// 该字段用于根据虚拟地址,获取该虚拟地址在对应的页表中是第几个页表项
     const PAGE_ENTRY_MASK: usize = Self::PAGE_ENTRY_NUM - 1;
+    /// 内核页表在顶级页表的第一个页表项的索引
+    const PAGE_KERNEL_INDEX: usize = (Self::PHYS_OFFSET & Self::PAGE_ADDRESS_MASK)
+        >> (Self::PAGE_ADDRESS_SHIFT - Self::PAGE_ENTRY_SHIFT);
 
     const PAGE_NEGATIVE_MASK: usize = !((Self::PAGE_ADDRESS_SIZE) - 1);
 
@@ -589,6 +619,27 @@ pub trait MemoryManagementArch: Clone + Copy + Debug {
     ///
     /// 页表项的值
     fn make_entry(paddr: PhysAddr, page_flags: usize) -> usize;
+
+    /// 判断一个VMA是否允许访问
+    ///
+    /// ## 参数
+    ///
+    /// - `vma`: 进行判断的VMA
+    /// - `write`: 是否需要写入权限(true 表示需要写权限)
+    /// - `execute`: 是否需要执行权限(true 表示需要执行权限)
+    /// - `foreign`: 是否是外部的(即非当前进程的)VMA
+    ///
+    /// ## 返回值
+    /// - `true`: VMA允许访问
+    /// - `false`: 错误的说明
+    fn vma_access_permitted(
+        _vma: Arc<LockedVMA>,
+        _write: bool,
+        _execute: bool,
+        _foreign: bool,
+    ) -> bool {
+        true
+    }
 }
 
 /// @brief 虚拟地址范围

+ 282 - 10
kernel/src/mm/page.rs

@@ -13,12 +13,14 @@ use crate::{
     arch::{interrupt::ipi::send_ipi, MMArch},
     exception::ipi::{IpiKind, IpiTarget},
     ipc::shm::ShmId,
-    kerror, kwarn,
+    kerror,
     libs::spinlock::{SpinLock, SpinLockGuard},
 };
 
 use super::{
-    allocator::page_frame::FrameAllocator, syscall::ProtFlags, ucontext::LockedVMA,
+    allocator::page_frame::{FrameAllocator, PageFrameCount},
+    syscall::ProtFlags,
+    ucontext::LockedVMA,
     MemoryManagementArch, PageTableKind, PhysAddr, VirtAddr,
 };
 
@@ -70,7 +72,9 @@ impl PageManager {
     }
 
     pub fn get_mut(&mut self, paddr: &PhysAddr) -> &mut Page {
-        self.phys2page.get_mut(paddr).unwrap()
+        self.phys2page
+            .get_mut(paddr)
+            .unwrap_or_else(|| panic!("{:?}", paddr))
     }
 
     pub fn insert(&mut self, paddr: PhysAddr, page: Page) {
@@ -141,9 +145,15 @@ impl Page {
         self.free_when_zero = dealloc_when_zero;
     }
 
+    #[inline(always)]
     pub fn anon_vma(&self) -> &HashSet<Arc<LockedVMA>> {
         &self.anon_vma
     }
+
+    #[inline(always)]
+    pub fn map_count(&self) -> usize {
+        self.map_count
+    }
 }
 
 #[derive(Debug)]
@@ -265,7 +275,7 @@ impl<Arch: MemoryManagementArch> PageTable<Arch> {
     /// ## 返回值
     ///
     /// 页表项在页表中的下标。如果addr不在当前页表所表示的虚拟地址空间中,则返回None
-    pub unsafe fn index_of(&self, addr: VirtAddr) -> Option<usize> {
+    pub fn index_of(&self, addr: VirtAddr) -> Option<usize> {
         let addr = VirtAddr::new(addr.data() & Arch::PAGE_ADDRESS_MASK);
         let shift = self.level * Arch::PAGE_ENTRY_SHIFT + Arch::PAGE_SHIFT;
 
@@ -290,6 +300,61 @@ impl<Arch: MemoryManagementArch> PageTable<Arch> {
             self.level - 1,
         ));
     }
+
+    /// 拷贝页表
+    /// ## 参数
+    ///
+    /// - `allocator`: 物理页框分配器
+    /// - `copy_on_write`: 是否写时复制
+    pub unsafe fn clone(
+        &self,
+        allocator: &mut impl FrameAllocator,
+        copy_on_write: bool,
+    ) -> Option<PageTable<Arch>> {
+        // 分配新页面作为新的页表
+        let phys = allocator.allocate_one()?;
+        let frame = MMArch::phys_2_virt(phys).unwrap();
+        MMArch::write_bytes(frame, 0, MMArch::PAGE_SIZE);
+        let new_table = PageTable::new(self.base, phys, self.level);
+        if self.level == 0 {
+            for i in 0..Arch::PAGE_ENTRY_NUM {
+                if let Some(mut entry) = self.entry(i) {
+                    if entry.present() {
+                        if copy_on_write {
+                            let mut new_flags = entry.flags().set_write(false);
+                            entry.set_flags(new_flags);
+                            self.set_entry(i, entry);
+                            new_flags = new_flags.set_dirty(false);
+                            entry.set_flags(new_flags);
+                            new_table.set_entry(i, entry);
+                        } else {
+                            let phys = allocator.allocate_one()?;
+                            let mut anon_vma_guard = page_manager_lock_irqsave();
+                            anon_vma_guard.insert(phys, Page::new(false));
+                            let old_phys = entry.address().unwrap();
+                            let frame = MMArch::phys_2_virt(phys).unwrap().data() as *mut u8;
+                            frame.copy_from_nonoverlapping(
+                                MMArch::phys_2_virt(old_phys).unwrap().data() as *mut u8,
+                                MMArch::PAGE_SIZE,
+                            );
+                            new_table.set_entry(i, PageEntry::new(phys, entry.flags()));
+                        }
+                    }
+                }
+            }
+        } else {
+            // 非一级页表拷贝时,对每个页表项对应的页表都进行拷贝
+            for i in 0..MMArch::PAGE_ENTRY_NUM {
+                if let Some(next_table) = self.next_level_table(i) {
+                    let table = next_table.clone(allocator, copy_on_write)?;
+                    let old_entry = self.entry(i).unwrap();
+                    let entry = PageEntry::new(table.phys(), old_entry.flags());
+                    new_table.set_entry(i, entry);
+                }
+            }
+        }
+        Some(new_table)
+    }
 }
 
 /// 页表项
@@ -368,6 +433,22 @@ impl<Arch: MemoryManagementArch> PageEntry<Arch> {
     pub fn present(&self) -> bool {
         return self.data & Arch::ENTRY_FLAG_PRESENT != 0;
     }
+
+    #[inline(always)]
+    pub fn empty(&self) -> bool {
+        self.data & !(Arch::ENTRY_FLAG_DIRTY & Arch::ENTRY_FLAG_ACCESSED) == 0
+    }
+
+    #[inline(always)]
+    pub fn protnone(&self) -> bool {
+        return self.data & (Arch::ENTRY_FLAG_PRESENT | Arch::ENTRY_FLAG_GLOBAL)
+            == Arch::ENTRY_FLAG_GLOBAL;
+    }
+
+    #[inline(always)]
+    pub fn write(&self) -> bool {
+        return self.data & Arch::ENTRY_FLAG_READWRITE != 0;
+    }
 }
 
 /// 页表项的标志位
@@ -605,6 +686,36 @@ impl<Arch: MemoryManagementArch> PageFlags<Arch> {
         return self.has_flag(Arch::ENTRY_FLAG_WRITE_THROUGH);
     }
 
+    /// 设置当前页表是否为脏页
+    ///
+    /// ## 参数
+    ///
+    /// - value: 如果为true,那么将当前页表项的写穿策略设置为写穿。
+    #[inline(always)]
+    pub fn set_dirty(self, value: bool) -> Self {
+        return self.update_flags(Arch::ENTRY_FLAG_DIRTY, value);
+    }
+
+    /// 设置当前页表被访问
+    ///
+    /// ## 参数
+    ///
+    /// - value: 如果为true,那么将当前页表项的访问标志设置为已访问。
+    #[inline(always)]
+    pub fn set_access(self, value: bool) -> Self {
+        return self.update_flags(Arch::ENTRY_FLAG_ACCESSED, value);
+    }
+
+    /// 设置指向的页是否为大页
+    ///
+    /// ## 参数
+    ///
+    /// - value: 如果为true,那么将当前页表项的访问标志设置为已访问。
+    #[inline(always)]
+    pub fn set_huge_page(self, value: bool) -> Self {
+        return self.update_flags(Arch::ENTRY_FLAG_HUGE_PAGE, value);
+    }
+
     /// MMIO内存的页表项标志
     #[inline(always)]
     pub fn mmio_flags() -> Self {
@@ -758,12 +869,6 @@ impl<Arch: MemoryManagementArch, F: FrameAllocator> PageMapper<Arch, F> {
             let i = table.index_of(virt)?;
             assert!(i < Arch::PAGE_ENTRY_NUM);
             if table.level() == 0 {
-                // todo: 检查是否已经映射
-                // 现在不检查的原因是,刚刚启动系统时,内核会映射一些页。
-                if table.entry_mapped(i)? {
-                    kwarn!("Page {:?} already mapped", virt);
-                }
-
                 compiler_fence(Ordering::SeqCst);
 
                 table.set_entry(i, entry);
@@ -797,6 +902,173 @@ impl<Arch: MemoryManagementArch, F: FrameAllocator> PageMapper<Arch, F> {
         }
     }
 
+    /// 进行大页映射
+    pub unsafe fn map_huge_page(
+        &mut self,
+        virt: VirtAddr,
+        flags: PageFlags<Arch>,
+    ) -> Option<PageFlush<Arch>> {
+        // 验证虚拟地址是否对齐
+        if !(virt.check_aligned(Arch::PAGE_SIZE)) {
+            kerror!("Try to map unaligned page: virt={:?}", virt);
+            return None;
+        }
+
+        let virt = VirtAddr::new(virt.data() & (!Arch::PAGE_NEGATIVE_MASK));
+
+        let mut table = self.table();
+        loop {
+            let i = table.index_of(virt)?;
+            assert!(i < Arch::PAGE_ENTRY_NUM);
+            let next_table = table.next_level_table(i);
+            if let Some(next_table) = next_table {
+                table = next_table;
+            } else {
+                break;
+            }
+        }
+
+        // 支持2M、1G大页,即页表层级为1、2级的页表可以映射大页
+        if table.level == 0 || table.level > 2 {
+            return None;
+        }
+
+        let (phys, count) = self.frame_allocator.allocate(PageFrameCount::new(
+            Arch::PAGE_ENTRY_NUM.pow(table.level as u32),
+        ))?;
+
+        MMArch::write_bytes(
+            MMArch::phys_2_virt(phys).unwrap(),
+            0,
+            MMArch::PAGE_SIZE * count.data(),
+        );
+
+        table.set_entry(
+            table.index_of(virt)?,
+            PageEntry::new(phys, flags.set_huge_page(true)),
+        )?;
+        Some(PageFlush::new(virt))
+    }
+
+    /// 为虚拟地址分配指定层级的页表
+    /// ## 参数
+    ///
+    /// - `virt`: 虚拟地址
+    /// - `level`: 指定页表层级
+    ///
+    /// ## 返回值
+    /// - Some(PageTable<Arch>): 虚拟地址对应层级的页表
+    /// - None: 对应页表不存在
+    pub unsafe fn allocate_table(
+        &mut self,
+        virt: VirtAddr,
+        level: usize,
+    ) -> Option<PageTable<Arch>> {
+        let table = self.get_table(virt, level + 1)?;
+        let i = table.index_of(virt)?;
+        let frame = self.frame_allocator.allocate_one()?;
+
+        // 清空这个页帧
+        MMArch::write_bytes(MMArch::phys_2_virt(frame).unwrap(), 0, MMArch::PAGE_SIZE);
+
+        // 设置页表项的flags
+        let flags: PageFlags<Arch> = PageFlags::new_page_table(virt.kind() == PageTableKind::User);
+
+        table.set_entry(i, PageEntry::new(frame, flags));
+        table.next_level_table(i)
+    }
+
+    /// 获取虚拟地址的指定层级页表
+    /// ## 参数
+    ///
+    /// - `virt`: 虚拟地址
+    /// - `level`: 指定页表层级
+    ///
+    /// ## 返回值
+    /// - Some(PageTable<Arch>): 虚拟地址对应层级的页表
+    /// - None: 对应页表不存在
+    pub fn get_table(&self, virt: VirtAddr, level: usize) -> Option<PageTable<Arch>> {
+        let mut table = self.table();
+        if level > Arch::PAGE_LEVELS - 1 {
+            return None;
+        }
+
+        unsafe {
+            loop {
+                if table.level == level {
+                    return Some(table);
+                }
+                let i = table.index_of(virt)?;
+                assert!(i < Arch::PAGE_ENTRY_NUM);
+
+                table = table.next_level_table(i)?;
+            }
+        }
+    }
+
+    /// 获取虚拟地址在指定层级页表的PageEntry
+    /// ## 参数
+    ///
+    /// - `virt`: 虚拟地址
+    /// - `level`: 指定页表层级
+    ///
+    /// ## 返回值
+    /// - Some(PageEntry<Arch>): 虚拟地址在指定层级的页表的有效PageEntry
+    /// - None: 无对应的有效PageEntry
+    pub fn get_entry(&self, virt: VirtAddr, level: usize) -> Option<PageEntry<Arch>> {
+        let table = self.get_table(virt, level)?;
+        let i = table.index_of(virt)?;
+        let entry = unsafe { table.entry(i) }?;
+
+        if !entry.empty() {
+            Some(entry)
+        } else {
+            None
+        }
+
+        // let mut table = self.table();
+        // if level > Arch::PAGE_LEVELS - 1 {
+        //     return None;
+        // }
+        // unsafe {
+        //     loop {
+        //         let i = table.index_of(virt)?;
+        //         assert!(i < Arch::PAGE_ENTRY_NUM);
+
+        //         if table.level == level {
+        //             let entry = table.entry(i)?;
+        //             if !entry.empty() {
+        //                 return Some(entry);
+        //             } else {
+        //                 return None;
+        //             }
+        //         }
+
+        //         table = table.next_level_table(i)?;
+        //     }
+        // }
+    }
+
+    /// 拷贝用户空间映射
+    /// ## 参数
+    ///
+    /// - `umapper`: 要拷贝的用户空间
+    /// - `copy_on_write`: 是否写时复制
+    pub unsafe fn clone_user_mapping(&mut self, umapper: &mut Self, copy_on_write: bool) {
+        let old_table = umapper.table();
+        let new_table = self.table();
+        let allocator = self.allocator_mut();
+        // 顶级页表的[0, PAGE_KERNEL_INDEX)项为用户空间映射
+        for entry_index in 0..Arch::PAGE_KERNEL_INDEX {
+            if let Some(next_table) = old_table.next_level_table(entry_index) {
+                let table = next_table.clone(allocator, copy_on_write).unwrap();
+                let old_entry = old_table.entry(entry_index).unwrap();
+                let entry = PageEntry::new(table.phys(), old_entry.flags());
+                new_table.set_entry(entry_index, entry);
+            }
+        }
+    }
+
     /// 将物理地址映射到具有线性偏移量的虚拟地址
     #[allow(dead_code)]
     pub unsafe fn map_linearly(

+ 101 - 0
kernel/src/mm/syscall.rs

@@ -72,6 +72,70 @@ bitflags! {
         const MREMAP_FIXED = 2;
         const MREMAP_DONTUNMAP = 4;
     }
+
+
+    pub struct MadvFlags: u64 {
+        /// 默认行为,系统会进行一定的预读和预写,适用于一般读取场景
+        const MADV_NORMAL = 0;
+        /// 随机访问模式,系统会尽量最小化数据读取量,适用于随机访问的场景
+        const MADV_RANDOM = 1;
+        /// 顺序访问模式,系统会进行积极的预读,访问后的页面可以尽快释放,适用于顺序读取场景
+        const MADV_SEQUENTIAL = 2;
+        /// 通知系统预读某些页面,用于应用程序提前准备数据
+        const MADV_WILLNEED = 3;
+        /// 通知系统应用程序不再需要某些页面,内核可以释放相关资源
+        const MADV_DONTNEED = 4;
+
+        /// 将指定范围的页面标记为延迟释放,真正的释放会延迟至内存压力发生时
+        const MADV_FREE = 8;
+        /// 应用程序请求释放指定范围的页面和相关的后备存储
+        const MADV_REMOVE = 9;
+        /// 在 fork 时排除指定区域
+        const MADV_DONTFORK = 10;
+        /// 取消 MADV_DONTFORK 的效果,不再在 fork 时排除指定区域
+        const MADV_DOFORK = 11;
+        /// 模拟内存硬件错误,触发内存错误处理器处理
+        const MADV_HWPOISON = 100;
+        /// 尝试软下线指定的内存范围
+        const MADV_SOFT_OFFLINE = 101;
+
+        /// 应用程序建议内核尝试合并指定范围内内容相同的页面
+        const MADV_MERGEABLE = 12;
+        /// 取消 MADV_MERGEABLE 的效果,不再合并页面
+        const MADV_UNMERGEABLE = 13;
+
+        /// 应用程序希望将指定范围以透明大页方式支持
+        const MADV_HUGEPAGE = 14;
+        /// 将指定范围标记为不值得用透明大页支持
+        const MADV_NOHUGEPAGE = 15;
+
+        /// 应用程序请求在核心转储时排除指定范围内的页面
+        const MADV_DONTDUMP = 16;
+        /// 取消 MADV_DONTDUMP 的效果,不再排除核心转储时的页面
+        const MADV_DODUMP = 17;
+
+        /// 在 fork 时将子进程的该区域内存填充为零
+        const MADV_WIPEONFORK = 18;
+        /// 取消 `MADV_WIPEONFORK` 的效果,不再在 fork 时填充子进程的内存
+        const MADV_KEEPONFORK = 19;
+
+        /// 应用程序不会立刻使用这些内存,内核将页面设置为非活动状态以便在内存压力发生时轻松回收
+        const MADV_COLD = 20;
+        /// 应用程序不会立刻使用这些内存,内核立即将这些页面换出
+        const MADV_PAGEOUT = 21;
+
+        /// 预先填充页面表,可读,通过触发读取故障
+        const MADV_POPULATE_READ = 22;
+        /// 预先填充页面表,可写,通过触发写入故障
+        const MADV_POPULATE_WRITE = 23;
+
+        /// 与 `MADV_DONTNEED` 类似,会将被锁定的页面释放
+        const MADV_DONTNEED_LOCKED = 24;
+
+        /// 同步将页面合并为新的透明大页
+        const MADV_COLLAPSE = 25;
+
+    }
 }
 
 impl From<MapFlags> for VmFlags {
@@ -265,6 +329,7 @@ impl Syscall {
             prot_flags,
             map_flags,
             true,
+            true,
         )?;
         return Ok(start_page.virt_address().data());
     }
@@ -423,4 +488,40 @@ impl Syscall {
             .map_err(|_| SystemError::EINVAL)?;
         return Ok(0);
     }
+
+    /// ## madvise系统调用
+    ///
+    /// ## 参数
+    ///
+    /// - `start_vaddr`:起始地址(已经对齐到页)
+    /// - `len`:长度(已经对齐到页)
+    /// - `madv_flags`:建议标志
+    pub fn madvise(
+        start_vaddr: VirtAddr,
+        len: usize,
+        madv_flags: usize,
+    ) -> Result<usize, SystemError> {
+        if !start_vaddr.check_aligned(MMArch::PAGE_SIZE) || !check_aligned(len, MMArch::PAGE_SIZE) {
+            return Err(SystemError::EINVAL);
+        }
+
+        if unlikely(verify_area(start_vaddr, len).is_err()) {
+            return Err(SystemError::EINVAL);
+        }
+        if unlikely(len == 0) {
+            return Err(SystemError::EINVAL);
+        }
+
+        let madv_flags = MadvFlags::from_bits(madv_flags as u64).ok_or(SystemError::EINVAL)?;
+
+        let current_address_space: Arc<AddressSpace> = AddressSpace::current()?;
+        let start_frame = VirtPageFrame::new(start_vaddr);
+        let page_count = PageFrameCount::new(len / MMArch::PAGE_SIZE);
+
+        current_address_space
+            .write()
+            .madvise(start_frame, page_count, madv_flags)
+            .map_err(|_| SystemError::EINVAL)?;
+        return Ok(0);
+    }
 }

+ 222 - 70
kernel/src/mm/ucontext.rs

@@ -22,7 +22,7 @@ use crate::{
     exception::InterruptArch,
     libs::{
         align::page_align_up,
-        rwlock::{RwLock, RwLockWriteGuard},
+        rwlock::RwLock,
         spinlock::{SpinLock, SpinLockGuard},
     },
     mm::page::page_manager_lock_irqsave,
@@ -35,7 +35,7 @@ use super::{
         deallocate_page_frames, PageFrameCount, PhysPageFrame, VirtPageFrame, VirtPageFrameIter,
     },
     page::{Flusher, InactiveFlusher, PageFlags, PageFlushAll},
-    syscall::{MapFlags, MremapFlags, ProtFlags},
+    syscall::{MadvFlags, MapFlags, MremapFlags, ProtFlags},
     MemoryManagementArch, PageTableKind, VirtAddr, VirtRegion, VmFlags,
 };
 
@@ -160,6 +160,11 @@ impl InnerAddressSpace {
         let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
         let new_addr_space = AddressSpace::new(false)?;
         let mut new_guard = new_addr_space.write();
+        unsafe {
+            new_guard
+                .user_mapper
+                .clone_from(&mut self.user_mapper, MMArch::PAGE_FAULT_ENABLED)
+        };
 
         // 拷贝用户栈的结构体信息,但是不拷贝用户栈的内容(因为后面VMA的拷贝会拷贝用户栈的内容)
         unsafe {
@@ -167,8 +172,6 @@ impl InnerAddressSpace {
         }
         let _current_stack_size = self.user_stack.as_ref().unwrap().stack_size();
 
-        let current_mapper = &mut self.user_mapper.utable;
-
         // 拷贝空洞
         new_guard.mappings.vm_holes = self.mappings.vm_holes.clone();
 
@@ -176,55 +179,23 @@ impl InnerAddressSpace {
             // TODO: 增加对VMA是否为文件映射的判断,如果是的话,就跳过
 
             let vma_guard: SpinLockGuard<'_, VMA> = vma.lock();
-            let old_flags = vma_guard.flags();
-            let tmp_flags: PageFlags<MMArch> = PageFlags::new().set_write(true);
-
-            // 分配内存页并创建新的VMA
-            let new_vma = VMA::zeroed(
-                VirtPageFrame::new(vma_guard.region.start()),
-                PageFrameCount::new(vma_guard.region.size() / MMArch::PAGE_SIZE),
-                *vma_guard.vm_flags(),
-                tmp_flags,
-                &mut new_guard.user_mapper.utable,
-                (),
-            )?;
+
+            // 仅拷贝VMA信息并添加反向映射,因为UserMapper克隆时已经分配了新的物理页
+            let new_vma = LockedVMA::new(vma_guard.clone_info_only());
             new_guard.mappings.vmas.insert(new_vma.clone());
             // kdebug!("new vma: {:x?}", new_vma);
-            let mut new_vma_guard = new_vma.lock();
+            let new_vma_guard = new_vma.lock();
+            let new_mapper = &new_guard.user_mapper.utable;
+            let mut anon_vma_guard = page_manager_lock_irqsave();
             for page in new_vma_guard.pages().map(|p| p.virt_address()) {
-                // kdebug!("page: {:x?}", page);
-                let current_frame = unsafe {
-                    MMArch::phys_2_virt(
-                        current_mapper
-                            .translate(page)
-                            .expect("VMA page not mapped")
-                            .0,
-                    )
-                }
-                .expect("Phys2Virt: vaddr overflow.")
-                .data() as *mut u8;
-
-                let new_frame = unsafe {
-                    MMArch::phys_2_virt(
-                        new_guard
-                            .user_mapper
-                            .utable
-                            .translate(page)
-                            .expect("VMA page not mapped")
-                            .0,
-                    )
-                }
-                .expect("Phys2Virt: vaddr overflow.")
-                .data() as *mut u8;
-
-                unsafe {
-                    // 拷贝数据
-                    new_frame.copy_from_nonoverlapping(current_frame, MMArch::PAGE_SIZE);
+                if let Some((paddr, _)) = new_mapper.translate(page) {
+                    let page = anon_vma_guard.get_mut(&paddr);
+                    page.insert_vma(new_vma.clone());
                 }
             }
-            drop(vma_guard);
 
-            new_vma_guard.remap(old_flags, &mut new_guard.user_mapper.utable, ())?;
+            drop(anon_vma_guard);
+            drop(vma_guard);
             drop(new_vma_guard);
         }
         drop(new_guard);
@@ -232,6 +203,24 @@ impl InnerAddressSpace {
         return Ok(new_addr_space);
     }
 
+    /// 拓展用户栈
+    /// ## 参数
+    ///
+    /// - `bytes`: 拓展大小
+    #[allow(dead_code)]
+    pub fn extend_stack(&mut self, mut bytes: usize) -> Result<(), SystemError> {
+        // kdebug!("extend user stack");
+        let prot_flags = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE | ProtFlags::PROT_EXEC;
+        let map_flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_GROWSDOWN;
+        let stack = self.user_stack.as_mut().unwrap();
+
+        bytes = page_align_up(bytes);
+        stack.mapped_size += bytes;
+        let len = stack.stack_bottom - stack.mapped_size;
+        self.map_anonymous(len, bytes, prot_flags, map_flags, false, false)?;
+        return Ok(());
+    }
+
     /// 判断当前的地址空间是否是当前进程的地址空间
     #[inline]
     pub fn is_current(&self) -> bool {
@@ -247,6 +236,7 @@ impl InnerAddressSpace {
     /// - `prot_flags`:保护标志
     /// - `map_flags`:映射标志
     /// - `round_to_min`:是否将`start_vaddr`对齐到`mmap_min`,如果为`true`,则当`start_vaddr`不为0时,会对齐到`mmap_min`,否则仅向下对齐到页边界
+    /// - `allocate_at_once`:是否立即分配物理空间
     ///
     /// ## 返回
     ///
@@ -258,7 +248,13 @@ impl InnerAddressSpace {
         prot_flags: ProtFlags,
         map_flags: MapFlags,
         round_to_min: bool,
+        allocate_at_once: bool,
     ) -> Result<VirtPageFrame, SystemError> {
+        let allocate_at_once = if MMArch::PAGE_FAULT_ENABLED {
+            allocate_at_once
+        } else {
+            true
+        };
         // 用于对齐hint的函数
         let round_hint_to_min = |hint: VirtAddr| {
             // 先把hint向下对齐到页边界
@@ -286,15 +282,38 @@ impl InnerAddressSpace {
 
         // kdebug!("map_anonymous: len = {}", len);
 
-        let start_page: VirtPageFrame = self.mmap(
-            round_hint_to_min(start_vaddr),
-            PageFrameCount::from_bytes(len).unwrap(),
-            prot_flags,
-            map_flags,
-            move |page, count, flags, mapper, flusher| {
-                VMA::zeroed(page, count, vm_flags, flags, mapper, flusher)
-            },
-        )?;
+        let start_page: VirtPageFrame = if allocate_at_once {
+            self.mmap(
+                round_hint_to_min(start_vaddr),
+                PageFrameCount::from_bytes(len).unwrap(),
+                prot_flags,
+                map_flags,
+                move |page, count, flags, mapper, flusher| {
+                    VMA::zeroed(page, count, vm_flags, flags, mapper, flusher)
+                },
+            )?
+        } else {
+            self.mmap(
+                round_hint_to_min(start_vaddr),
+                PageFrameCount::from_bytes(len).unwrap(),
+                prot_flags,
+                map_flags,
+                move |page, count, flags, _mapper, _flusher| {
+                    Ok(LockedVMA::new(VMA {
+                        region: VirtRegion::new(
+                            page.virt_address(),
+                            count.data() * MMArch::PAGE_SIZE,
+                        ),
+                        vm_flags,
+                        flags,
+                        mapped: true,
+                        user_address_space: None,
+                        self_ref: Weak::default(),
+                        provider: Provider::Allocated,
+                    }))
+                },
+            )?
+        };
 
         return Ok(start_page);
     }
@@ -428,7 +447,7 @@ impl InnerAddressSpace {
         }
 
         // 获取映射后的新内存页面
-        let new_page = self.map_anonymous(new_vaddr, new_len, prot_flags, map_flags, true)?;
+        let new_page = self.map_anonymous(new_vaddr, new_len, prot_flags, map_flags, true, true)?;
         let new_page_vaddr = new_page.virt_address();
 
         // 拷贝旧内存区域内容到新内存区域
@@ -556,6 +575,47 @@ impl InnerAddressSpace {
         return Ok(());
     }
 
+    pub fn madvise(
+        &mut self,
+        start_page: VirtPageFrame,
+        page_count: PageFrameCount,
+        behavior: MadvFlags,
+    ) -> Result<(), SystemError> {
+        let (mut active, mut inactive);
+        let mut flusher = if self.is_current() {
+            active = PageFlushAll::new();
+            &mut active as &mut dyn Flusher<MMArch>
+        } else {
+            inactive = InactiveFlusher::new();
+            &mut inactive as &mut dyn Flusher<MMArch>
+        };
+
+        let mapper = &mut self.user_mapper.utable;
+
+        let region = VirtRegion::new(start_page.virt_address(), page_count.bytes());
+        let regions = self.mappings.conflicts(region).collect::<Vec<_>>();
+
+        for r in regions {
+            let r = *r.lock().region();
+            let r = self.mappings.remove_vma(&r).unwrap();
+
+            let intersection = r.lock().region().intersect(&region).unwrap();
+            let split_result = r
+                .extract(intersection, mapper)
+                .expect("Failed to extract VMA");
+
+            if let Some(before) = split_result.prev {
+                self.mappings.insert_vma(before);
+            }
+            if let Some(after) = split_result.after {
+                self.mappings.insert_vma(after);
+            }
+            r.do_madvise(behavior, mapper, &mut flusher)?;
+            self.mappings.insert_vma(r);
+        }
+        Ok(())
+    }
+
     /// 创建新的用户栈
     ///
     /// ## 参数
@@ -605,7 +665,7 @@ impl InnerAddressSpace {
             let len = new_brk - self.brk;
             let prot_flags = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE | ProtFlags::PROT_EXEC;
             let map_flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_FIXED;
-            self.map_anonymous(old_brk, len, prot_flags, map_flags, true)?;
+            self.map_anonymous(old_brk, len, prot_flags, map_flags, true, false)?;
 
             self.brk = new_brk;
             return Ok(old_brk);
@@ -658,6 +718,16 @@ impl UserMapper {
     pub fn new(utable: PageMapper) -> Self {
         return Self { utable };
     }
+
+    /// 拷贝用户空间映射
+    /// ## 参数
+    ///
+    /// - `umapper`: 要拷贝的用户空间
+    /// - `copy_on_write`: 是否写时复制
+    pub unsafe fn clone_from(&mut self, umapper: &mut Self, copy_on_write: bool) {
+        self.utable
+            .clone_user_mapping(&mut umapper.utable, copy_on_write);
+    }
 }
 
 impl Drop for UserMapper {
@@ -710,6 +780,35 @@ impl UserMappings {
         return None;
     }
 
+    /// 向下寻找距离虚拟地址最近的VMA
+    /// ## 参数
+    ///
+    /// - `vaddr`: 虚拟地址
+    ///
+    /// ## 返回值
+    /// - Some(Arc<LockedVMA>): 虚拟地址所在的或最近的下一个VMA
+    /// - None: 未找到VMA
+    #[allow(dead_code)]
+    pub fn find_nearest(&self, vaddr: VirtAddr) -> Option<Arc<LockedVMA>> {
+        let mut nearest: Option<Arc<LockedVMA>> = None;
+        for v in self.vmas.iter() {
+            let guard = v.lock();
+            if guard.region.contains(vaddr) {
+                return Some(v.clone());
+            }
+            if guard.region.start > vaddr
+                && if let Some(ref nearest) = nearest {
+                    guard.region.start < nearest.lock().region.start
+                } else {
+                    true
+                }
+            {
+                nearest = Some(v.clone());
+            }
+        }
+        return nearest;
+    }
+
     /// 获取当前进程的地址空间中,与给定虚拟地址范围有重叠的VMA的迭代器。
     pub fn conflicts(&self, request: VirtRegion) -> impl Iterator<Item = Arc<LockedVMA>> + '_ {
         let r = self
@@ -959,6 +1058,9 @@ impl LockedVMA {
         let mut page_manager_guard: SpinLockGuard<'_, crate::mm::page::PageManager> =
             page_manager_lock_irqsave();
         for page in guard.region.pages() {
+            if mapper.translate(page.virt_address()).is_none() {
+                continue;
+            }
             let (paddr, _, flush) = unsafe { mapper.unmap_phys(page.virt_address(), true) }
                 .expect("Failed to unmap, beacuse of some page is not mapped");
 
@@ -1065,6 +1167,39 @@ impl LockedVMA {
             after,
         ));
     }
+
+    /// 判断VMA是否为外部(非当前进程空间)的VMA
+    pub fn is_foreign(&self) -> bool {
+        let guard = self.lock();
+        if let Some(space) = guard.user_address_space.clone() {
+            if let Some(space) = space.upgrade() {
+                return AddressSpace::is_current(&space);
+            } else {
+                return true;
+            }
+        } else {
+            return true;
+        }
+    }
+
+    /// 判断VMA是否可访问
+    pub fn is_accessible(&self) -> bool {
+        let guard = self.lock();
+        let vm_access_flags: VmFlags = VmFlags::VM_READ | VmFlags::VM_WRITE | VmFlags::VM_EXEC;
+        guard.vm_flags().intersects(vm_access_flags)
+    }
+
+    /// 判断VMA是否为匿名映射
+    pub fn is_anonymous(&self) -> bool {
+        //TODO: 实现匿名映射判断逻辑,目前仅支持匿名映射
+        true
+    }
+
+    /// 判断VMA是否为大页映射
+    pub fn is_hugepage(&self) -> bool {
+        //TODO: 实现巨页映射判断逻辑,目前不支持巨页映射
+        false
+    }
 }
 
 impl Drop for LockedVMA {
@@ -1182,6 +1317,18 @@ impl VMA {
         };
     }
 
+    pub fn clone_info_only(&self) -> Self {
+        return Self {
+            region: self.region,
+            vm_flags: self.vm_flags,
+            flags: self.flags,
+            mapped: self.mapped,
+            user_address_space: None,
+            self_ref: Weak::default(),
+            provider: Provider::Allocated,
+        };
+    }
+
     #[inline(always)]
     pub fn flags(&self) -> PageFlags<MMArch> {
         return self.flags;
@@ -1203,15 +1350,15 @@ impl VMA {
         assert!(self.mapped);
         for page in self.region.pages() {
             // kdebug!("remap page {:?}", page.virt_address());
-            // 暂时要求所有的页帧都已经映射到页表
-            // TODO: 引入Lazy Mapping, 通过缺页中断来映射页帧,这里就不必要求所有的页帧都已经映射到页表了
-            let r = unsafe {
-                mapper
-                    .remap(page.virt_address(), flags)
-                    .expect("Failed to remap, beacuse of some page is not mapped")
-            };
+            if mapper.translate(page.virt_address()).is_some() {
+                let r = unsafe {
+                    mapper
+                        .remap(page.virt_address(), flags)
+                        .expect("Failed to remap")
+                };
+                flusher.consume(r);
+            }
             // kdebug!("consume page {:?}", page.virt_address());
-            flusher.consume(r);
             // kdebug!("remap page {:?} done", page.virt_address());
         }
         self.flags = flags;
@@ -1426,8 +1573,10 @@ impl UserStack {
         let actual_stack_bottom = stack_bottom - guard_size;
 
         let mut prot_flags = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE;
-        let map_flags =
-            MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_FIXED_NOREPLACE;
+        let map_flags = MapFlags::MAP_PRIVATE
+            | MapFlags::MAP_ANONYMOUS
+            | MapFlags::MAP_FIXED_NOREPLACE
+            | MapFlags::MAP_GROWSDOWN;
         // kdebug!(
         //     "map anonymous stack: {:?} {}",
         //     actual_stack_bottom,
@@ -1439,6 +1588,7 @@ impl UserStack {
             prot_flags,
             map_flags,
             false,
+            false,
         )?;
         // test_buddy();
         // 设置保护页只读
@@ -1479,7 +1629,7 @@ impl UserStack {
         mut bytes: usize,
     ) -> Result<(), SystemError> {
         let prot_flags = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE | ProtFlags::PROT_EXEC;
-        let map_flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS;
+        let map_flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_GROWSDOWN;
 
         bytes = page_align_up(bytes);
         self.mapped_size += bytes;
@@ -1490,6 +1640,7 @@ impl UserStack {
             prot_flags,
             map_flags,
             false,
+            false,
         )?;
 
         return Ok(());
@@ -1509,7 +1660,7 @@ impl UserStack {
     #[allow(dead_code)]
     pub fn extend(
         &mut self,
-        vm: &mut RwLockWriteGuard<InnerAddressSpace>,
+        vm: &mut InnerAddressSpace,
         mut bytes: usize,
     ) -> Result<(), SystemError> {
         let prot_flags = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE | ProtFlags::PROT_EXEC;
@@ -1524,6 +1675,7 @@ impl UserStack {
             prot_flags,
             map_flags,
             false,
+            false,
         )?;
 
         return Ok(());

+ 8 - 3
kernel/src/syscall/mod.rs

@@ -855,10 +855,15 @@ impl Syscall {
             }
 
             SYS_MADVISE => {
-                // 这个太吵了,总是打印,先注释掉
-                // kwarn!("SYS_MADVISE has not yet been implemented");
-                Ok(0)
+                let addr = args[0];
+                let len = page_align_up(args[1]);
+                if addr & (MMArch::PAGE_SIZE - 1) != 0 {
+                    Err(SystemError::EINVAL)
+                } else {
+                    Self::madvise(VirtAddr::new(addr), len, args[2])
+                }
             }
+
             SYS_GETTID => Self::gettid().map(|tid| tid.into()),
             SYS_GETUID => Self::getuid(),
 

+ 1 - 1
user/dadk/config/nova_shell-0.1.0.dadk

@@ -6,7 +6,7 @@
     "BuildFromSource": {
       "Git": {
         "url": "https://git.mirrors.dragonos.org.cn/DragonOS-Community/NovaShell.git",
-        "revision": "c6454d3220"
+        "revision": "dcf45035c1"
       }
     }
   },