Browse Source

bugfix: 修复smp启动的时候,损坏0号核心的idle进程的内核栈的问题 (#711)

---------

Co-authored-by: longjin <[email protected]>
Co-authored-by: heyicong <[email protected]>
曾俊 11 months ago
parent
commit
3959e94df3

+ 4 - 3
kernel/src/arch/x86_64/asm/apu_boot.S

@@ -1,4 +1,5 @@
 #include "../common/asm.h"
+#include <asm/apu_boot.h>
 
 
 .align 0x1000  // 按照4k对齐
@@ -112,9 +113,9 @@ _apu_code64_vector:
 	.long	_apu_code64 - _apu_boot_base
 	.word	0x18,0	
 
-.align 0x1000 
-_apu_boot_tmp_stack_start:
-//	.org	0x400
+.align 0x1000
+ENTRY(_apu_boot_tmp_stack_start)
+	.skip	APU_BOOT_TMP_STACK_SIZE
 _apu_boot_tmp_stack_end:
 
 ENTRY(_apu_boot_end)

+ 23 - 3
kernel/src/arch/x86_64/asm/head.S

@@ -3,6 +3,7 @@
 // 2022/01/20
 
 #include "common/asm.h"
+#include <asm/apu_boot.h>
 
 // 以下是来自 multiboot2 规范的定义
 //  How many bytes from the start of the file we search for the header.
@@ -249,7 +250,6 @@ ENTRY(_start64)
     lidt IDT_POINTER(%rip)
     //lidt $IDT_POINTER
     movq GDT_POINTER(%rip), %r12
-    movq head_stack_start(%rip), %rsp
 
     // 分支,判断是否为apu
     movq	$0x1b,	%rcx		// 根据IA32_APIC_BASE.BSP[8]标志位判断处理器是否为apu
@@ -257,6 +257,9 @@ ENTRY(_start64)
 	bt	$8,	%rax
 	jnc	load_apu_cr3
 
+    // BSP处理器
+    movq head_stack_start(%rip), %rsp
+    
     // 2. 设置临时页表
     // 最高级
     mov $__PML4E, %eax
@@ -318,11 +321,11 @@ load_cr3:
 load_apu_cr3:
     // 由于内存管理模块重置了页表,因此ap核心初始化的时候,需要使用新的内核页表。
     // 这个页表的值由smp模块设置到__APU_START_CR3变量中
-
     // 加载__APU_START_CR3中的值
     movq $__APU_START_CR3, %rax
     movq 0(%rax), %rax
     movq %rax, %cr3
+    movq _apu_boot_tmp_stack_top_addr(%rip), %rsp
     jmp to_switch_seg
 
 to_switch_seg:
@@ -348,7 +351,21 @@ entry64:
     movq %rax, %gs
     movq %rax, %ss
 
-    movq head_stack_start(%rip), %rsp //rsp的地址
+    // 分支,判断是否为apu,然后设置栈指针·
+    movq	$0x1b,	%rcx		// 根据IA32_APIC_BASE.BSP[8]标志位判断处理器是否为apu
+	rdmsr
+	bt	$8,	%rax
+	jnc	__set_ap_tmp_stack_start2
+__set_bsp_stack_start2:
+    movq head_stack_start(%rip), %rsp
+    jmp __set_stack_start2_ok
+__set_ap_tmp_stack_start2:
+    // 设置ap核心的临时栈
+    movq _apu_boot_tmp_stack_top_addr(%rip), %rsp
+    jmp __set_stack_start2_ok
+
+__set_stack_start2_ok:
+
     
     // 重新加载GDT和IDT,加载到高地址
     leaq GDT_Table(%rip), %r8
@@ -485,6 +502,9 @@ go_to_ignore_int:
 ENTRY(head_stack_start)
     .quad BSP_IDLE_STACK_SPACE + 32768
 
+ENTRY(_apu_boot_tmp_stack_top_addr)
+    .quad _apu_boot_tmp_stack_start + APU_BOOT_TMP_STACK_SIZE
+
 // 初始化页表
 .align 0x1000 //设置为4k对齐
 __PML4E:

+ 2 - 0
kernel/src/arch/x86_64/include/asm/apu_boot.h

@@ -0,0 +1,2 @@
+#pragma once
+#define APU_BOOT_TMP_STACK_SIZE 1024

+ 2 - 1
kernel/src/arch/x86_64/interrupt/trap.rs

@@ -300,13 +300,14 @@ unsafe extern "C" fn do_general_protection(regs: &'static TrapFrame, error_code:
         ""
     };
     kerror!(
-        "do_general_protection(13), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}
+        "do_general_protection(13), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t rflags: {:#x}\t CPU: {}, \tpid: {:?}
 {}{}{}
 Segment Selector Index: {:#x}\n
 ",
         error_code,
         regs.rsp,
         regs.rip,
+        regs.rflags,
         smp_get_processor_id().data(),
         ProcessManager::current_pid(),
         msg1, msg2, msg3,

+ 3 - 14
kernel/src/arch/x86_64/process/mod.rs

@@ -460,9 +460,6 @@ unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut Arc
         // mov fs, [rsi + {off_fs}]
         // mov gs, [rsi + {off_gs}]
 
-        push rbp
-        push rax
-
         mov [rdi + {off_rbp}], rbp
         mov rbp, [rsi + {off_rbp}]
 
@@ -509,17 +506,9 @@ unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut Arc
     );
 }
 
-/// 从`switch_to_inner`返回后,执行这个函数
-///
-/// 也就是说,当进程再次被调度时,会从这里开始执行
-#[inline(never)]
-unsafe extern "sysv64" fn switch_back() {
-    asm!(concat!(
-        "
-        pop rax
-        pop rbp
-        "
-    ))
+#[naked]
+unsafe extern "sysv64" fn switch_back() -> ! {
+    asm!("ret", options(noreturn));
 }
 
 pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! {

+ 5 - 3
kernel/src/arch/x86_64/smp/mod.rs

@@ -46,11 +46,12 @@ struct ApStartStackInfo {
 #[no_mangle]
 unsafe extern "C" fn smp_ap_start() -> ! {
     CurrentIrqArch::interrupt_disable();
+
     let vaddr = if let Some(t) = smp_cpu_manager()
         .cpuhp_state(smp_get_processor_id())
         .thread()
     {
-        t.kernel_stack().stack_max_address().data() - 16
+        t.kernel_stack_force_ref().stack_max_address().data() - 16
     } else {
         // 没有设置ap核心的栈,那么就进入死循环。
         loop {
@@ -214,15 +215,16 @@ impl SMPArch for X86_64SMPArch {
     }
 
     fn start_cpu(cpu_id: ProcessorId, _cpu_hpstate: &CpuHpCpuState) -> Result<(), SystemError> {
-        kdebug!("start_cpu: cpu_id: {:#x}\n", cpu_id.data());
-
         Self::copy_smp_start_code();
 
+        fence(Ordering::SeqCst);
         ipi_send_smp_init();
         fence(Ordering::SeqCst);
         ipi_send_smp_startup(cpu_id)?;
+
         fence(Ordering::SeqCst);
         ipi_send_smp_startup(cpu_id)?;
+
         fence(Ordering::SeqCst);
 
         return Ok(());

+ 1 - 2
kernel/src/init/initial_kthread.rs

@@ -33,7 +33,6 @@ fn kernel_init() -> Result<(), SystemError> {
 
     // 由于目前加锁,速度过慢,所以先不开启双缓冲
     // scm_enable_double_buffer().expect("Failed to enable double buffer");
-    stdio_init().expect("Failed to initialize stdio");
 
     ahci_init().expect("Failed to initialize AHCI");
 
@@ -55,7 +54,7 @@ fn kenrel_init_freeable() -> Result<(), SystemError> {
     do_initcalls().unwrap_or_else(|err| {
         panic!("Failed to initialize subsystems: {:?}", err);
     });
-
+    stdio_init().expect("Failed to initialize stdio");
     smp_init();
 
     return Ok(());

+ 4 - 2
kernel/src/ipc/pipe.rs

@@ -158,11 +158,13 @@ impl IndexNode for LockedPipeInode {
         _offset: usize,
         len: usize,
         buf: &mut [u8],
-        data: SpinLockGuard<FilePrivateData>,
+        data_guard: SpinLockGuard<FilePrivateData>,
     ) -> Result<usize, SystemError> {
+        let data = data_guard.clone();
+        drop(data_guard);
         // 获取mode
         let mode: FileMode;
-        if let FilePrivateData::Pipefs(pdata) = &*data {
+        if let FilePrivateData::Pipefs(pdata) = &data {
             mode = pdata.mode;
         } else {
             return Err(SystemError::EBADF);

+ 3 - 4
kernel/src/libs/lib_ui/textui.rs

@@ -337,6 +337,7 @@ impl TextuiBuf<'_> {
             return self.guard.as_mut().unwrap().as_mut();
         }
     }
+
     pub fn put_color_in_pixel(&mut self, color: u32, index: usize) {
         let index = index as isize;
         match self.bit_depth {
@@ -363,7 +364,7 @@ impl TextuiBuf<'_> {
                 };
             }
             _ => {
-                panic!("不支持的位深度!")
+                panic!("bidepth unsupported!")
             }
         }
     }
@@ -373,6 +374,7 @@ impl TextuiBuf<'_> {
     pub fn get_index_by_x_y(x: usize, y: usize) -> usize {
         textui_framework().metadata.read().buf_info().width() as usize * y + x
     }
+
     pub fn get_start_index_by_lineid_lineindex(lineid: LineId, lineindex: LineIndex) -> usize {
         //   x 左上角列像素点位置
         //   y 左上角行像素点位置
@@ -622,7 +624,6 @@ impl TextuiWindow {
     /// - vline_id 要刷新的虚拟行号
     /// - start 起始字符号
     /// - count 要刷新的字符数量
-
     fn textui_refresh_characters(
         &mut self,
         vline_id: LineId,
@@ -708,7 +709,6 @@ impl TextuiWindow {
     /// ## 参数
     /// - window 窗口结构体
     /// - vline_id 虚拟行号
-
     fn textui_new_line(&mut self) -> Result<i32, SystemError> {
         // todo: 支持在两个虚拟行之间插入一个新行
         let actual_line_sum = textui_framework().actual_line.load(Ordering::SeqCst);
@@ -753,7 +753,6 @@ impl TextuiWindow {
     /// ## 参数
     /// - window
     /// - character
-
     fn true_textui_putchar_window(
         &mut self,
         character: char,

+ 5 - 0
kernel/src/libs/rwlock.rs

@@ -371,6 +371,11 @@ impl<T> RwLock<T> {
     pub unsafe fn get_mut(&mut self) -> &mut T {
         unsafe { &mut *self.data.get() }
     }
+
+    #[allow(dead_code)]
+    pub unsafe fn force_get_ref(&self) -> &T {
+        unsafe { &*self.data.get() }
+    }
 }
 
 impl<T: Default> Default for RwLock<T> {

+ 9 - 0
kernel/src/process/mod.rs

@@ -466,8 +466,13 @@ impl ProcessManager {
             .expect("next_pcb is None");
 
         // 由于进程切换前使用了SpinLockGuard::leak(),所以这里需要手动释放锁
+        fence(Ordering::SeqCst);
+
         prev_pcb.arch_info.force_unlock();
+        fence(Ordering::SeqCst);
+
         next_pcb.arch_info.force_unlock();
+        fence(Ordering::SeqCst);
     }
 
     /// 如果目标进程正在目标CPU上运行,那么就让这个cpu陷入内核态
@@ -818,6 +823,10 @@ impl ProcessControlBlock {
         return self.kernel_stack.read();
     }
 
+    pub unsafe fn kernel_stack_force_ref(&self) -> &KernelStack {
+        self.kernel_stack.force_get_ref()
+    }
+
     #[inline(always)]
     #[allow(dead_code)]
     pub fn kernel_stack_mut(&self) -> RwLockWriteGuard<KernelStack> {

+ 1 - 1
kernel/src/sched/mod.rs

@@ -809,7 +809,7 @@ pub fn scheduler_tick() {
 #[inline]
 pub fn schedule(sched_mod: SchedMode) {
     let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
-    assert!(ProcessManager::current_pcb().preempt_count() == 0);
+    assert_eq!(ProcessManager::current_pcb().preempt_count(), 0);
     __schedule(sched_mod);
 }
 

+ 5 - 1
kernel/src/smp/cpu/mod.rs

@@ -261,6 +261,7 @@ impl SmpCpuManager {
         // todo: 等待CPU启动完成
 
         ProcessManager::wakeup(cpu_state.thread.as_ref().unwrap())?;
+
         CurrentSMPArch::start_cpu(cpu_id, cpu_state)?;
         assert_eq!(ProcessManager::current_pcb().preempt_count(), 0);
         self.wait_for_ap_thread(cpu_state, cpu_state.bringup);
@@ -270,7 +271,10 @@ impl SmpCpuManager {
 
     fn wait_for_ap_thread(&self, cpu_state: &mut CpuHpCpuState, bringup: bool) {
         if bringup {
-            cpu_state.comp_done_up.wait_for_completion().ok();
+            cpu_state
+                .comp_done_up
+                .wait_for_completion()
+                .expect("failed to wait ap thread");
         } else {
             todo!("wait_for_ap_thread")
         }

+ 2 - 2
kernel/src/time/timer.rs

@@ -259,7 +259,7 @@ pub fn schedule_timeout(mut timeout: i64) -> Result<i64, SystemError> {
         let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
         ProcessManager::mark_sleep(true).ok();
         drop(irq_guard);
-        schedule(SchedMode::SM_PREEMPT);
+        schedule(SchedMode::SM_NONE);
         return Ok(MAX_TIMEOUT);
     } else if timeout < 0 {
         kerror!("timeout can't less than 0");
@@ -278,7 +278,7 @@ pub fn schedule_timeout(mut timeout: i64) -> Result<i64, SystemError> {
 
         drop(irq_guard);
 
-        schedule(SchedMode::SM_PREEMPT);
+        schedule(SchedMode::SM_NONE);
         let time_remaining: i64 = timeout - TIMER_JIFFIES.load(Ordering::SeqCst) as i64;
         if time_remaining >= 0 {
             // 被提前唤醒,返回剩余时间