Browse Source

Block IO Scheduler (#158)

* Block io调度器
* process_wakeup时,对cfs的进程,重设虚拟运行时间。解决由于休眠的进程,其虚拟运行时间过小,导致其他进程饥饿的问题

* 1、为AP核启动apic_timer,使其能够运行调度
2、增加kick_cpu功能,支持让某个特定核心立即运行调度器
3、wait_queue的唤醒,改为立即唤醒。
4、增加进程在核心间迁移的功能
5、CFS调度器为每个核心设置单独的IDLE进程pcb(pid均为0)
6、pcb中增加migrate_to字段
7、当具有多核时,io调度器在核心1上运行。

* io调度器文件位置修改

* 修改io的makefile

* 更新makefile中的变量名

* 修改io调度器函数名

---------

Co-authored-by: login <[email protected]>
houmkh 2 years ago
parent
commit
f6ba114bb0
38 changed files with 829 additions and 262 deletions
  1. 1 1
      kernel/src/Makefile
  2. 3 3
      kernel/src/arch/x86_64/cpu.rs
  3. 8 15
      kernel/src/common/blk_types.h
  4. 1 1
      kernel/src/common/completion.h
  5. 20 20
      kernel/src/common/kthread.h
  6. 72 113
      kernel/src/driver/disk/ahci/ahci.c
  7. 38 8
      kernel/src/driver/disk/ahci/ahci.h
  8. 52 12
      kernel/src/driver/interrupt/apic/apic.c
  9. 1 0
      kernel/src/driver/interrupt/apic/apic.h
  10. 33 9
      kernel/src/driver/interrupt/apic/apic_timer.c
  11. 2 0
      kernel/src/driver/interrupt/apic/apic_timer.h
  12. 2 0
      kernel/src/exception/irq.h
  13. 2 4
      kernel/src/filesystem/fat32/fat32.c
  14. 6 2
      kernel/src/include/bindings/wrapper.h
  15. 17 0
      kernel/src/io/Makefile
  16. 17 0
      kernel/src/io/block/Makefile
  17. 15 0
      kernel/src/io/block/block_io_scheduler.c
  18. 9 0
      kernel/src/io/block/block_io_scheduler.h
  19. 304 0
      kernel/src/io/block/block_io_scheduler.rs
  20. 1 0
      kernel/src/io/block/mod.rs
  21. 1 0
      kernel/src/io/mod.rs
  22. 1 0
      kernel/src/lib.rs
  23. 1 1
      kernel/src/libs/cpu.c
  24. 1 5
      kernel/src/libs/semaphore.c
  25. 1 2
      kernel/src/libs/spinlock.rs
  26. 1 1
      kernel/src/libs/wait_queue_head.c
  27. 11 5
      kernel/src/process/proc-types.h
  28. 13 16
      kernel/src/process/process.c
  29. 2 1
      kernel/src/process/process.rs
  30. 43 6
      kernel/src/sched/cfs.rs
  31. 11 1
      kernel/src/sched/completion.c
  32. 56 7
      kernel/src/sched/core.rs
  33. 8 12
      kernel/src/sched/rt.rs
  34. 3 1
      kernel/src/sched/sched.h
  35. 1 5
      kernel/src/smp/core.rs
  36. 65 8
      kernel/src/smp/smp.c
  37. 4 0
      kernel/src/smp/smp.h
  38. 2 3
      kernel/src/syscall/syscall.c

+ 1 - 1
kernel/src/Makefile

@@ -17,7 +17,7 @@ export ASFLAGS := --64
 LD_LIST := head.o
 
 
-kernel_subdirs := common driver process debug filesystem time arch exception mm smp sched syscall ktest libs ipc
+kernel_subdirs := common driver process debug filesystem time arch exception mm smp sched syscall ktest libs ipc io
 	
 
 

+ 3 - 3
kernel/src/arch/x86_64/cpu.rs

@@ -2,17 +2,17 @@ use core::arch::asm;
 
 /// @brief 获取当前cpu的apic id
 #[inline]
-pub fn arch_current_apic_id() -> u8 {
+pub fn current_cpu_id() -> u8 {
     let cpuid_res: u32;
     unsafe {
         asm!(
              "mov eax, 1",
              "cpuid",
-             "mov r15, ebx",
+             "mov r15, rbx",
              lateout("r15") cpuid_res
         );
     }
-    return (cpuid_res >> 24) as u8;
+    return ((cpuid_res >> 24) & 0xff) as u8;
 }
 
 /// @brief 通过pause指令,让cpu休息一会儿。降低空转功耗

+ 8 - 15
kernel/src/common/blk_types.h

@@ -40,11 +40,8 @@ struct block_device_request_packet
     uint64_t LBA_start;
     uint32_t count;
     uint64_t buffer_vaddr;
-
     uint8_t device_type; // 0: ahci
     void (*end_handler)(ul num, ul arg);
-
-    wait_queue_node_t wait_queue;
 };
 
 /**
@@ -53,7 +50,6 @@ struct block_device_request_packet
  */
 struct block_device_request_queue
 {
-    wait_queue_node_t wait_queue_list;
     struct block_device_request_packet *in_service; // 正在请求的结点
     ul request_count;
 };
@@ -64,13 +60,12 @@ struct block_device_request_queue
  */
 struct block_device
 {
-    sector_t bd_start_sector;                    // 该分区的起始扇区
-    uint64_t bd_start_LBA;                       // 起始LBA号
-    sector_t bd_sectors_num;                     // 该分区的扇区数
-    struct vfs_superblock_t *bd_superblock;      // 执行超级块的指针
-    struct blk_gendisk *bd_disk;                 // 当前分区所属的磁盘
-    struct block_device_request_queue *bd_queue; // 请求队列
-    uint16_t bd_partno;                          // 在磁盘上的分区号
+    sector_t bd_start_sector;               // 该分区的起始扇区
+    uint64_t bd_start_LBA;                  // 起始LBA号
+    sector_t bd_sectors_num;                // 该分区的扇区数
+    struct vfs_superblock_t *bd_superblock; // 执行超级块的指针
+    struct blk_gendisk *bd_disk;            // 当前分区所属的磁盘
+    uint16_t bd_partno;                     // 在磁盘上的分区号
 };
 
 // 定义blk_gendisk中的标志位
@@ -85,10 +80,8 @@ struct blk_gendisk
     char disk_name[DISK_NAME_LEN]; // 磁盘驱动器名称
     uint16_t part_cnt;             // 磁盘分区计数
     uint16_t flags;
-    struct block_device *partition;                   // 磁盘分区数组
-    const struct block_device_operation *fops;        // 磁盘操作
-    struct block_device_request_queue *request_queue; // 磁盘请求队列
+    struct block_device *partition;            // 磁盘分区数组
+    const struct block_device_operation *fops; // 磁盘操作
     void *private_data;
-
     mutex_t open_mutex; // open()/close()操作的互斥锁
 };

+ 1 - 1
kernel/src/common/completion.h

@@ -30,7 +30,7 @@ long wait_for_completion_interruptible_timeout(struct completion *x, long timeou
 void wait_for_multicompletion(struct completion x[], int n);
 bool try_wait_for_completion(struct completion *x);
 bool completion_done(struct completion *x);
-
+struct completion *completion_alloc();
 /**
  * 测试函数声明 (测试代码辅助函数)
  */

+ 20 - 20
kernel/src/common/kthread.h

@@ -1,8 +1,8 @@
 #pragma once
 
+#include <common/err.h>
 #include <common/numa.h>
 #include <process/proc-types.h>
-#include <common/err.h>
 #include <process/process.h>
 
 /**
@@ -21,9 +21,7 @@ struct kthread_info_t
     char *full_name; // 内核线程的名称
 };
 
-struct process_control_block *kthread_create_on_node(int (*thread_fn)(void *data),
-                                                     void *data,
-                                                     int node,
+struct process_control_block *kthread_create_on_node(int (*thread_fn)(void *data), void *data, int node,
                                                      const char name_fmt[], ...);
 /**
  * @brief 在当前结点上创建一个内核线程
@@ -35,12 +33,12 @@ struct process_control_block *kthread_create_on_node(int (*thread_fn)(void *data
  *
  * 请注意,该宏会创建一个内核线程,并将其设置为停止状态
  */
-#define kthread_create(thread_fn, data, name_fmt, arg...) \
+#define kthread_create(thread_fn, data, name_fmt, arg...)                                                              \
     kthread_create_on_node(thread_fn, data, NUMA_NO_NODE, name_fmt, ##arg)
 
 /**
  * @brief 创建内核线程,并将其唤醒
- * 
+ *
  * @param thread_fn 该内核线程要执行的函数
  * @param data 传递给 thread_fn 的参数数据
  * @param name_fmt printf-style format string for the thread name
@@ -56,32 +54,34 @@ struct process_control_block *kthread_create_on_node(int (*thread_fn)(void *data
 
 /**
  * @brief 创建内核实时线程,并将其唤醒
- * 
+ *
  * @param thread_fn 该内核线程要执行的函数
  * @param data 传递给 thread_fn 的参数数据
  * @param name_fmt printf-style format string for the thread name
  * @param arg name_fmt的参数
  */
-#define kthread_run_rt(thread_fn, data, name_fmt, ...)                                                          \
-    ({                                                                                                       \
-        struct process_control_block *__kt = kthread_create(thread_fn, data, name_fmt, ##__VA_ARGS__); \
-        __kt=process_init_rt_pcb(__kt);                                                              \
-        if (!IS_ERR(__kt)){                                                                                   \
-            process_wakeup(__kt);}                                                                            \
-        __kt;                                                                                                \
+#define kthread_run_rt(thread_fn, data, name_fmt, ...)                                                                 \
+    ({                                                                                                                 \
+        struct process_control_block *__kt = kthread_create(thread_fn, data, name_fmt, ##__VA_ARGS__);                 \
+        __kt = process_init_rt_pcb(__kt);                                                                              \
+        if (!IS_ERR(__kt))                                                                                             \
+        {                                                                                                              \
+            process_wakeup(__kt);                                                                                      \
+        }                                                                                                              \
+        __kt;                                                                                                          \
     })
 
 /**
  * @brief 向kthread发送停止信号,请求其结束
- * 
+ *
  * @param pcb 内核线程的pcb
  * @return int 错误码
  */
-int kthread_stop(struct process_control_block * pcb);
+int kthread_stop(struct process_control_block *pcb);
 
 /**
  * @brief 内核线程调用该函数,检查自身的标志位,判断自己是否应该执行完任务后退出
- * 
+ *
  * @return true 内核线程应该退出
  * @return false 无需退出
  */
@@ -89,14 +89,14 @@ bool kthread_should_stop(void);
 
 /**
  * @brief 让当前内核线程退出,并返回result参数给kthread_stop()函数
- * 
+ *
  * @param result 返回值
  */
 void kthread_exit(long result);
 
 /**
  * @brief 初始化kthread机制(只应被process_init调用)
- * 
+ *
  * @return int 错误码
  */
 int kthread_mechanism_init();
@@ -119,7 +119,7 @@ struct kthread_info_t *to_kthread(struct process_control_block *pcb);
 
 /**
  * @brief 释放pcb指向的worker private
- * 
+ *
  * @param pcb 要释放的pcb
  */
 void free_kthread_struct(struct process_control_block *pcb);

+ 72 - 113
kernel/src/driver/disk/ahci/ahci.c

@@ -8,11 +8,10 @@
 #include <common/block.h>
 #include <filesystem/MBR.h>
 #include <debug/bug.h>
+#include <common/kthread.h>
 
 struct pci_device_structure_header_t *ahci_devs[MAX_AHCI_DEVICES];
 
-struct block_device_request_queue ahci_req_queue;
-
 struct blk_gendisk ahci_gendisk0 = {0}; // 暂时硬性指定一个ahci_device
 static int __first_port = -1;           // 临时用于存储 ahci控制器的第一个可用端口 的变量
 
@@ -24,10 +23,6 @@ static uint64_t ahci_port_base_phys_addr; // 端口映射的物理基地址(ah
 static void start_cmd(HBA_PORT *port);
 static void stop_cmd(HBA_PORT *port);
 static void port_rebase(HBA_PORT *port, int portno);
-static long ahci_query_disk();
-
-// Find a free command list slot
-static int ahci_find_cmdslot(HBA_PORT *port);
 
 // 计算HBA_MEM的虚拟内存地址
 #define cal_HBA_MEM_VIRT_ADDR(device_num) (AHCI_MAPPING_BASE + (ul)(((struct pci_device_structure_general_device_t *)(ahci_devs[device_num]))->BAR5 - ((((struct pci_device_structure_general_device_t *)(ahci_devs[0]))->BAR5) & PAGE_2M_MASK)))
@@ -92,7 +87,7 @@ static int ahci_init_gendisk()
     ahci_gendisk0.flags = BLK_GF_AHCI;
     ahci_gendisk0.fops = &ahci_operation;
     mutex_init(&ahci_gendisk0.open_mutex);
-    ahci_gendisk0.request_queue = &ahci_req_queue;
+
     // 为存储分区结构,分配内存空间
     ahci_gendisk0.private_data = __alloc_private_data();
     // 读取分区表
@@ -127,7 +122,7 @@ static int ahci_init_gendisk()
                 // 初始化分区结构体
                 ahci_gendisk0.partition[cnt].bd_disk = &ahci_gendisk0;
                 ahci_gendisk0.partition[cnt].bd_partno = cnt;
-                ahci_gendisk0.partition[cnt].bd_queue = &ahci_req_queue;
+                // FIXME 需要注释
                 ahci_gendisk0.partition[cnt].bd_sectors_num = ptable->DPTE[i].total_sectors;
                 ahci_gendisk0.partition[cnt].bd_start_sector = ptable->DPTE[i].starting_sector;
                 ahci_gendisk0.partition[cnt].bd_superblock = NULL; // 挂载文件系统时才会初始化superblock
@@ -175,11 +170,6 @@ void ahci_init()
     kdebug("ahci_port_base_vaddr=%#018lx", ahci_port_base_vaddr);
     ahci_probe_port(0);
 
-    // 初始化请求队列
-    ahci_req_queue.in_service = NULL;
-    wait_queue_init(&ahci_req_queue.wait_queue_list, NULL);
-    ahci_req_queue.request_count = 0;
-
     BUG_ON(ahci_init_gendisk() != 0);
     kinfo("AHCI initialized.");
 }
@@ -333,17 +323,23 @@ static void port_rebase(HBA_PORT *port, int portno)
  * @param starth high 32bits of start addr
  * @param count total sectors to read
  * @param buf buffer
+ * @param ret_slot 执行命令的插槽号(传出参数)
  * @return true done
  * @return false failed
  */
-static bool ahci_read(HBA_PORT *port, uint32_t startl, uint32_t starth, uint32_t count, uint64_t buf)
+static int ahci_read(HBA_PORT *port, uint32_t startl, uint32_t starth, uint32_t count, uint64_t buf, int8_t *ret_slot)
 {
     port->is = (uint32_t)-1; // Clear pending interrupt bits
-    int spin = 0;            // Spin lock timeout counter
+
+    int spin = 0; // Spin lock timeout counter
     int slot = ahci_find_cmdslot(port);
 
     if (slot == -1)
         return E_NOEMPTYSLOT;
+    if (ret_slot)
+    {
+        *ret_slot = slot;
+    }
 
     HBA_CMD_HEADER *cmdheader = (HBA_CMD_HEADER *)phys_2_virt(port->clb);
     cmdheader += slot;
@@ -402,43 +398,59 @@ static bool ahci_read(HBA_PORT *port, uint32_t startl, uint32_t starth, uint32_t
 
     port->ci = 1 << slot; // Issue command
 
-    current_pcb->flags |= PF_NEED_SCHED;
+    return 0;
+}
 
-    int retval = AHCI_SUCCESS;
-    // Wait for completion
-    while (1)
-    {
-        // In some longer duration reads, it may be helpful to spin on the DPS bit
-        // in the PxIS port field as well (1 << 5)
-        if ((port->ci & (1 << slot)) == 0)
-            break;
-        if (port->is & HBA_PxIS_TFES) // Task file error
-        {
-            kerror("Read disk error");
-            retval = E_TASK_FILE_ERROR;
-            break;
-        }
-    }
+/**
+ * @brief 检查请求包是否已完成
+ *
+ * @param port_num HBA PORT 编号
+ * @param ahci_ctrl_num ahci控制号
+ * @param ret_slot 执行命令的插槽号
+ * @param err 错误信息
+ */
+int ahci_check_complete(uint8_t port_num, uint8_t ahci_ctrl_num, int8_t slot, char *err)
+{
 
-    // Check again
-    if (port->is & HBA_PxIS_TFES)
+    HBA_PORT *port = ahci_get_port(port_num, ahci_ctrl_num);
+    int retval = -EBUSY;
+    if (slot == -1)
+        retval = -EINVAL;
+    // In some longer duration reads, it may be helpful to spin on the DPS bit
+    // in the PxIS port field as well (1 << 5)
+    if ((port->ci & (1 << slot)) == 0)
+        retval = 0;
+    if (port->is & HBA_PxIS_TFES) // Task file error
     {
-        kerror("Read disk error");
+        if (err != NULL)
+            kerror(*err);
         retval = E_TASK_FILE_ERROR;
     }
-    enter_syscall_int(SYS_AHCI_END_REQ, 0, 0, 0, 0, 0, 0, 0, 0);
     return retval;
 }
 
-static bool ahci_write(HBA_PORT *port, uint32_t startl, uint32_t starth, uint32_t count,
-                       uint64_t buf)
+/**
+ * @brief write data to SATA device using 48bit LBA address
+ *
+ * @param port HBA PORT
+ * @param startl low 32bits of start addr
+ * @param starth high 32bits of start addr
+ * @param count total sectors to read
+ * @param buf buffer
+ * @param ret_slot 执行命令的插槽号(传出参数)
+ * @return success 0
+ */
+static int ahci_write(HBA_PORT *port, uint32_t startl, uint32_t starth, uint32_t count,
+                      uint64_t buf, int8_t *ret_slot)
 {
-    // kdebug("ahci write");
     port->is = 0xffff; // Clear pending interrupt bits
     int slot = ahci_find_cmdslot(port);
     if (slot == -1)
         return E_NOEMPTYSLOT;
-
+    if (ret_slot)
+    {
+        *ret_slot = slot;
+    }
     HBA_CMD_HEADER *cmdheader = (HBA_CMD_HEADER *)phys_2_virt(port->clb);
 
     cmdheader += slot;
@@ -479,37 +491,13 @@ static bool ahci_write(HBA_PORT *port, uint32_t startl, uint32_t starth, uint32_
 
     cmdfis->countl = count & 0xff;
     cmdfis->counth = count >> 8;
-    //    printk("[slot]{%d}", slot);
     port->ci = 1; // Issue command
 
-    current_pcb->flags |= PF_NEED_SCHED;
-    int retval = AHCI_SUCCESS;
-
-    while (1)
-    {
-        // In some longer duration reads, it may be helpful to spin on the DPS bit
-        // in the PxIS port field as well (1 << 5)
-        if ((port->ci & (1 << slot)) == 0)
-            break;
-        if (port->is & HBA_PxIS_TFES)
-        { // Task file error
-            kerror("Write disk error");
-            retval = E_TASK_FILE_ERROR;
-            break;
-        }
-    }
-    if (port->is & HBA_PxIS_TFES)
-    {
-        kerror("Write disk error");
-        retval = E_TASK_FILE_ERROR;
-    }
-    // kdebug("ahci write retval=%d", retval);
-    enter_syscall_int(SYS_AHCI_END_REQ, 0, 0, 0, 0, 0, 0, 0, 0);
-    return retval;
+    return 0;
 }
 
 // Find a free command list slot
-static int ahci_find_cmdslot(HBA_PORT *port)
+int ahci_find_cmdslot(HBA_PORT *port)
 {
     // If not set in SACT and CI, the slot is free
     uint32_t slots = (port->sact | port->ci);
@@ -547,9 +535,7 @@ long ahci_close()
  */
 static struct ahci_request_packet_t *ahci_make_request(long cmd, uint64_t base_addr, uint64_t count, uint64_t buffer, uint8_t ahci_ctrl_num, uint8_t port_num)
 {
-    struct ahci_request_packet_t *pack = (struct ahci_request_packet_t *)kmalloc(sizeof(struct ahci_request_packet_t), 0);
-
-    wait_queue_init(&pack->blk_pak.wait_queue, current_pcb);
+    struct ahci_request_packet_t *pack = (struct ahci_request_packet_t *)kzalloc(sizeof(struct ahci_request_packet_t), 0);
     pack->blk_pak.device_type = BLK_TYPE_AHCI;
 
     // 由于ahci不需要中断即可读取磁盘,因此end handler为空
@@ -572,70 +558,30 @@ static struct ahci_request_packet_t *ahci_make_request(long cmd, uint64_t base_a
     pack->blk_pak.LBA_start = base_addr;
     pack->blk_pak.count = count;
     pack->blk_pak.buffer_vaddr = buffer;
-
     pack->ahci_ctrl_num = ahci_ctrl_num;
     pack->port_num = port_num;
     return pack;
 }
 
-/**
- * @brief 结束磁盘请求
- *
- */
-void ahci_end_request()
-{
-    ahci_req_queue.in_service->wait_queue.pcb->state = PROC_RUNNING;
-    // ahci_req_queue.in_service->wait_queue.pcb->flags |= PF_NEED_SCHED;
-    // current_pcb->flags |= PF_NEED_SCHED;
-    kfree((uint64_t *)ahci_req_queue.in_service);
-    ahci_req_queue.in_service = NULL;
-
-    // 进行下一轮的磁盘请求 (由于未实现单独的io调度器,这里会造成长时间的io等待)
-    if (ahci_req_queue.request_count > 0)
-        ahci_query_disk();
-}
-
-static long ahci_query_disk()
+long ahci_query_disk(struct ahci_request_packet_t *pack, int8_t *ret_slot)
 {
-    wait_queue_node_t *wait_queue_tmp = container_of(list_next(&ahci_req_queue.wait_queue_list.wait_list), wait_queue_node_t, wait_list);
-    struct ahci_request_packet_t *pack = (struct ahci_request_packet_t *)container_of(wait_queue_tmp, struct block_device_request_packet, wait_queue);
 
-    ahci_req_queue.in_service = (struct block_device_request_packet *)pack;
-    list_del(&(ahci_req_queue.in_service->wait_queue.wait_list));
-    --ahci_req_queue.request_count;
-    // kdebug("ahci_query_disk");
     long ret_val = 0;
-
     switch (pack->blk_pak.cmd)
     {
     case AHCI_CMD_READ_DMA_EXT:
-        ret_val = ahci_read(&(ahci_devices[pack->ahci_ctrl_num].hba_mem->ports[pack->port_num]), pack->blk_pak.LBA_start & 0xFFFFFFFF, ((pack->blk_pak.LBA_start) >> 32) & 0xFFFFFFFF, pack->blk_pak.count, pack->blk_pak.buffer_vaddr);
+        ret_val = ahci_read(&(ahci_devices[pack->ahci_ctrl_num].hba_mem->ports[pack->port_num]), pack->blk_pak.LBA_start & 0xFFFFFFFF, ((pack->blk_pak.LBA_start) >> 32) & 0xFFFFFFFF, pack->blk_pak.count, pack->blk_pak.buffer_vaddr, ret_slot);
         break;
     case AHCI_CMD_WRITE_DMA_EXT:
-        ret_val = ahci_write(&(ahci_devices[pack->ahci_ctrl_num].hba_mem->ports[pack->port_num]), pack->blk_pak.LBA_start & 0xFFFFFFFF, ((pack->blk_pak.LBA_start) >> 32) & 0xFFFFFFFF, pack->blk_pak.count, pack->blk_pak.buffer_vaddr);
+        ret_val = ahci_write(&(ahci_devices[pack->ahci_ctrl_num].hba_mem->ports[pack->port_num]), pack->blk_pak.LBA_start & 0xFFFFFFFF, ((pack->blk_pak.LBA_start) >> 32) & 0xFFFFFFFF, pack->blk_pak.count, pack->blk_pak.buffer_vaddr, ret_slot);
         break;
     default:
         kerror("Unsupport ahci command: %#05lx", pack->blk_pak.cmd);
         ret_val = E_UNSUPPORTED_CMD;
         break;
     }
-    // kdebug("ahci_query_disk: retval=%d", ret_val);
-    // ahci_end_request();
-    return ret_val;
-}
-
-/**
- * @brief 将请求包提交到io队列
- *
- * @param pack
- */
-static void ahci_submit(struct ahci_request_packet_t *pack)
-{
-    list_append(&(ahci_req_queue.wait_queue_list.wait_list), &(pack->blk_pak.wait_queue.wait_list));
-    ++ahci_req_queue.request_count;
 
-    if (ahci_req_queue.in_service == NULL) // 当前没有正在请求的io包,立即执行磁盘请求
-        ahci_query_disk();
+    return ret_val;
 }
 
 /**
@@ -656,10 +602,13 @@ static long ahci_transfer(struct blk_gendisk *gd, long cmd, uint64_t base_addr,
     if (cmd == AHCI_CMD_READ_DMA_EXT || cmd == AHCI_CMD_WRITE_DMA_EXT)
     {
         pack = ahci_make_request(cmd, base_addr, count, buf, pdata->ahci_ctrl_num, pdata->ahci_port_num);
-        ahci_submit(pack);
+        ahci_push_request(pack);
     }
     else
+    {
+        kdebug("ahci_transfer: E_UNSUPPORTED_CMD");
         return E_UNSUPPORTED_CMD;
+    }
 
     return AHCI_SUCCESS;
 }
@@ -675,3 +624,13 @@ static long ahci_ioctl(long cmd, long arg)
 {
     return 0;
 }
+
+/**
+ * @brief 根据端口号获取端口结构体
+ * @param port_num 端口号
+ * @param ahci_ctrl_num 控制号
+ */
+HBA_PORT *ahci_get_port(uint8_t port_num, uint8_t ahci_ctrl_num)
+{
+    return &(ahci_devices[ahci_ctrl_num].hba_mem->ports[port_num]);
+}

+ 38 - 8
kernel/src/driver/disk/ahci/ahci.h

@@ -380,10 +380,10 @@ static void ahci_probe_port(const uint32_t device_num);
  * @param starth high 32bits of start addr
  * @param count total sectors to read
  * @param buf buffer
- * @return true done
- * @return false failed
+ * @param ret_slot 执行命令的插槽号(传出参数)
+ * @return success 0
  */
-static bool ahci_read(HBA_PORT *port, uint32_t startl, uint32_t starth, uint32_t count, uint64_t buf);
+static int ahci_read(HBA_PORT *port, uint32_t startl, uint32_t starth, uint32_t count, uint64_t buf, int8_t *ret_slot);
 
 /**
  * @brief write data to SATA device using 48bit LBA address
@@ -393,10 +393,40 @@ static bool ahci_read(HBA_PORT *port, uint32_t startl, uint32_t starth, uint32_t
  * @param starth high 32bits of start addr
  * @param count total sectors to read
  * @param buf buffer
- * @return true done
- * @return false failed
+ * @param ret_slot 执行命令的插槽号(传出参数)
+ * @return success 0
  */
-static bool ahci_write(HBA_PORT *port, uint32_t startl, uint32_t starth, uint32_t count,
-                       uint64_t buf);
+static int ahci_write(HBA_PORT *port, uint32_t startl, uint32_t starth, uint32_t count,
+                      uint64_t buf, int8_t *ret_slot);
 
-void ahci_end_request();
+void ahci_end_request();
+
+/**
+ * @brief 检查请求包是否已完成
+ *
+ * @param port_num HBA PORT 编号
+ * @param ahci_ctrl_num ahci控制号
+ * @param ret_slot 执行命令的插槽号
+ * @param err 错误信息
+ */
+int ahci_check_complete(uint8_t port_num, uint8_t ahci_ctrl_num, int8_t slot, char *err);
+
+/**
+ * @brief 根据端口号获取端口结构体
+ * @param port_num 端口号
+ * @param ahci_ctrl_num 控制号
+ */
+HBA_PORT *ahci_get_port(uint8_t port_num, uint8_t ahci_ctrl_num);
+
+/**
+ * @brief Find a free command list slot
+ * @param port
+ */
+int ahci_find_cmdslot(HBA_PORT *port);
+
+/**
+ * @brief 读取磁盘信息
+ * @param pack io请求包
+ * @param ret_slot 执行命令的插槽号(传出参数)
+ */
+long ahci_query_disk(struct ahci_request_packet_t *pack, int8_t *ret_slot);

+ 52 - 12
kernel/src/driver/interrupt/apic/apic.c

@@ -1,11 +1,11 @@
 #include "apic.h"
 #include "apic_timer.h"
-#include <common/kprint.h>
-#include <common/printk.h>
 #include <common/cpu.h>
 #include <common/glib.h>
-#include <exception/gate.h>
+#include <common/kprint.h>
+#include <common/printk.h>
 #include <driver/acpi/acpi.h>
+#include <exception/gate.h>
 
 #include <exception/softirq.h>
 #include <process/process.h>
@@ -62,7 +62,8 @@ void apic_io_apic_init()
     // kdebug("MADT->length= %d bytes", madt->header.Length);
     //  寻找io apic的ICS
     void *ent = (void *)(madt_addr) + sizeof(struct acpi_Multiple_APIC_Description_Table_t);
-    struct apic_Interrupt_Controller_Structure_header_t *header = (struct apic_Interrupt_Controller_Structure_header_t *)ent;
+    struct apic_Interrupt_Controller_Structure_header_t *header =
+        (struct apic_Interrupt_Controller_Structure_header_t *)ent;
     while (header->length > 2)
     {
         header = (struct apic_Interrupt_Controller_Structure_header_t *)ent;
@@ -85,7 +86,8 @@ void apic_io_apic_init()
 
     // kdebug("(ul)apic_ioapic_map.virtual_index_addr=%#018lx", (ul)apic_ioapic_map.virtual_index_addr);
     // 填写页表,完成地址映射
-    mm_map_phys_addr((ul)apic_ioapic_map.virtual_index_addr, apic_ioapic_map.addr_phys, PAGE_2M_SIZE, PAGE_KERNEL_PAGE | PAGE_PWT | PAGE_PCD, false);
+    mm_map_phys_addr((ul)apic_ioapic_map.virtual_index_addr, apic_ioapic_map.addr_phys, PAGE_2M_SIZE,
+                     PAGE_KERNEL_PAGE | PAGE_PWT | PAGE_PCD, false);
 
     // 设置IO APIC ID 为0x0f000000
     *apic_ioapic_map.virtual_index_addr = 0x00;
@@ -99,7 +101,8 @@ void apic_io_apic_init()
     // 获取IO APIC Version
     *apic_ioapic_map.virtual_index_addr = 0x01;
     io_mfence();
-    kdebug("IO APIC Version=%d, Max Redirection Entries=%d", *apic_ioapic_map.virtual_data_addr & 0xff, (((*apic_ioapic_map.virtual_data_addr) >> 16) & 0xff) + 1);
+    kdebug("IO APIC Version=%d, Max Redirection Entries=%d", *apic_ioapic_map.virtual_data_addr & 0xff,
+           (((*apic_ioapic_map.virtual_data_addr) >> 16) & 0xff) + 1);
 
     // 初始化RTE表项,将所有RTE表项屏蔽
     for (int i = 0x10; i < 0x40; i += 2)
@@ -171,7 +174,8 @@ static void __local_apic_xapic_init()
     local_apic_max_LVT_entries = ((qword >> 16) & 0xff) + 1;
     local_apic_version = qword & 0xff;
 
-    kdebug("local APIC Version:%#010x,Max LVT Entry:%#010x,SVR(Suppress EOI Broadcast):%#04x\t", local_apic_version, local_apic_max_LVT_entries, (qword >> 24) & 0x1);
+    kdebug("local APIC Version:%#010x,Max LVT Entry:%#010x,SVR(Suppress EOI Broadcast):%#04x\t", local_apic_version,
+           local_apic_max_LVT_entries, (qword >> 24) & 0x1);
 
     if ((qword & 0xff) < 0x10)
     {
@@ -189,7 +193,8 @@ static void __local_apic_xapic_init()
 
     *(uint *)(APIC_LOCAL_APIC_VIRT_BASE_ADDR + LOCAL_APIC_OFFSET_Local_APIC_LVT_THERMAL) = APIC_LVT_INT_MASKED;
     io_mfence();
-    *(uint *)(APIC_LOCAL_APIC_VIRT_BASE_ADDR + LOCAL_APIC_OFFSET_Local_APIC_LVT_PERFORMANCE_MONITOR) = APIC_LVT_INT_MASKED;
+    *(uint *)(APIC_LOCAL_APIC_VIRT_BASE_ADDR + LOCAL_APIC_OFFSET_Local_APIC_LVT_PERFORMANCE_MONITOR) =
+        APIC_LVT_INT_MASKED;
     io_mfence();
     *(uint *)(APIC_LOCAL_APIC_VIRT_BASE_ADDR + LOCAL_APIC_OFFSET_Local_APIC_LVT_LINT0) = APIC_LVT_INT_MASKED;
     io_mfence();
@@ -232,7 +237,8 @@ static void __local_apic_x2apic_init()
     local_apic_max_LVT_entries = ((eax >> 16) & 0xff) + 1;
     local_apic_version = eax & 0xff;
 
-    kdebug("local APIC Version:%#010x,Max LVT Entry:%#010x,SVR(Suppress EOI Broadcast):%#04x\t", local_apic_version, local_apic_max_LVT_entries, (eax >> 24) & 0x1);
+    kdebug("local APIC Version:%#010x,Max LVT Entry:%#010x,SVR(Suppress EOI Broadcast):%#04x\t", local_apic_version,
+           local_apic_max_LVT_entries, (eax >> 24) & 0x1);
 
     if ((eax & 0xff) < 0x10)
         kdebug("82489DX discrete APIC");
@@ -269,7 +275,8 @@ void apic_local_apic_init()
     uint64_t ia32_apic_base = rdmsr(0x1b);
     // kdebug("apic base=%#018lx", (ia32_apic_base & 0x1FFFFFFFFFF000));
     // 映射Local APIC 寄存器地址
-    mm_map_phys_addr(APIC_LOCAL_APIC_VIRT_BASE_ADDR, (ia32_apic_base & 0x1FFFFFFFFFFFFF), PAGE_2M_SIZE, PAGE_KERNEL_PAGE | PAGE_PWT | PAGE_PCD, false);
+    mm_map_phys_addr(APIC_LOCAL_APIC_VIRT_BASE_ADDR, (ia32_apic_base & 0x1FFFFFFFFFFFFF), PAGE_2M_SIZE,
+                     PAGE_KERNEL_PAGE | PAGE_PWT | PAGE_PCD, false);
     uint a, b, c, d;
 
     cpu_cpuid(1, 0, &a, &b, &c, &d);
@@ -588,7 +595,8 @@ void apic_local_apic_edge_ack(ul irq_num)
 uint apic_get_ics(const uint type, ul ret_vaddr[], uint *total)
 {
     void *ent = (void *)(madt) + sizeof(struct acpi_Multiple_APIC_Description_Table_t);
-    struct apic_Interrupt_Controller_Structure_header_t *header = (struct apic_Interrupt_Controller_Structure_header_t *)ent;
+    struct apic_Interrupt_Controller_Structure_header_t *header =
+        (struct apic_Interrupt_Controller_Structure_header_t *)ent;
     bool flag = false;
 
     uint cnt = 0;
@@ -626,7 +634,8 @@ uint apic_get_ics(const uint type, ul ret_vaddr[], uint *total)
  * @param dest_apicID 目标apicID
  */
 void apic_make_rte_entry(struct apic_IO_APIC_RTE_entry *entry, uint8_t vector, uint8_t deliver_mode, uint8_t dest_mode,
-                         uint8_t deliver_status, uint8_t polarity, uint8_t irr, uint8_t trigger, uint8_t mask, uint8_t dest_apicID)
+                         uint8_t deliver_status, uint8_t polarity, uint8_t irr, uint8_t trigger, uint8_t mask,
+                         uint8_t dest_apicID)
 {
 
     entry->vector = vector;
@@ -652,4 +661,35 @@ void apic_make_rte_entry(struct apic_IO_APIC_RTE_entry *entry, uint8_t vector, u
         entry->destination.logical.reserved1 = 0;
     }
 }
+
+/**
+ * @brief 获取当前处理器的local apic id
+ * 
+ * @return uint32_t 
+ */
+uint32_t apic_get_local_apic_id()
+{
+    // 获取Local APIC的基础信息 (参见英特尔开发手册Vol3A 10-39)
+    //                          Table 10-6. Local APIC Register Address Map Supported by x2APIC
+
+    if (flag_support_x2apic)
+    {
+        // 获取 Local APIC ID
+        // 0x802处是x2APIC ID 位宽32bits 的 Local APIC ID register
+        uint32_t x = 0;
+        __asm__ __volatile__("movq $0x802, %%rcx    \n\t"
+                             "rdmsr  \n\t"
+                             : "=a"(x)::"memory");
+        return x;
+    }
+    else
+    {
+        // kdebug("get Local APIC ID: edx=%#010x, eax=%#010x", edx, eax);
+        // kdebug("local_apic_id=%#018lx", );
+
+        uint32_t x = *(uint32_t *)(APIC_LOCAL_APIC_VIRT_BASE_ADDR + LOCAL_APIC_OFFSET_Local_APIC_ID);
+        x = ((x >> 24) & 0xff);
+        return x;
+    }
+}
 #pragma GCC pop_options

+ 1 - 0
kernel/src/driver/interrupt/apic/apic.h

@@ -330,4 +330,5 @@ void apic_local_apic_edge_ack(ul irq_num); // local apic边沿触发 应答
 void apic_make_rte_entry(struct apic_IO_APIC_RTE_entry *entry, uint8_t vector, uint8_t deliver_mode, uint8_t dest_mode,
                          uint8_t deliver_status, uint8_t polarity, uint8_t irr, uint8_t trigger, uint8_t mask, uint8_t dest_apicID);
 
+uint32_t apic_get_local_apic_id();
 #pragma GCC pop_options

+ 33 - 9
kernel/src/driver/interrupt/apic/apic_timer.c

@@ -1,12 +1,29 @@
 #include "apic_timer.h"
+#include <common/kprint.h>
 #include <exception/irq.h>
 #include <process/process.h>
-#include <common/kprint.h>
 #include <sched/sched.h>
 
 // #pragma GCC push_options
 // #pragma GCC optimize("O0")
 uint64_t apic_timer_ticks_result = 0;
+static spinlock_t apic_timer_init_lock = {1};
+// bsp 是否已经完成apic时钟初始化
+static bool bsp_initialized = false;
+
+/**
+ * @brief 初始化AP核的apic时钟
+ *
+ */
+void apic_timer_ap_core_init()
+{
+    while (!bsp_initialized)
+    {
+        pause();
+    }
+
+    apic_timer_init();
+}
 
 void apic_timer_enable(uint64_t irq_num)
 {
@@ -55,13 +72,12 @@ void apic_timer_uninstall(ul irq_num)
     io_mfence();
 }
 
-hardware_intr_controller apic_timer_intr_controller =
-    {
-        .enable = apic_timer_enable,
-        .disable = apic_timer_disable,
-        .install = apic_timer_install,
-        .uninstall = apic_timer_uninstall,
-        .ack = apic_local_apic_edge_ack,
+hardware_intr_controller apic_timer_intr_controller = {
+    .enable = apic_timer_enable,
+    .disable = apic_timer_disable,
+    .install = apic_timer_install,
+    .uninstall = apic_timer_uninstall,
+    .ack = apic_local_apic_edge_ack,
 };
 
 /**
@@ -84,15 +100,23 @@ void apic_timer_handler(uint64_t number, uint64_t param, struct pt_regs *regs)
  */
 void apic_timer_init()
 {
+
     if (apic_timer_ticks_result == 0)
     {
         kBUG("APIC timer ticks in 5ms is equal to ZERO!");
         while (1)
             hlt();
     }
+    spin_lock(&apic_timer_init_lock);
     kinfo("Initializing apic timer for cpu %d", proc_current_cpu_id);
     io_mfence();
-    irq_register(APIC_TIMER_IRQ_NUM, &apic_timer_ticks_result, &apic_timer_handler, 0, &apic_timer_intr_controller, "apic timer");
+    irq_register(APIC_TIMER_IRQ_NUM, &apic_timer_ticks_result, &apic_timer_handler, 0, &apic_timer_intr_controller,
+                 "apic timer");
     io_mfence();
+    if (proc_current_cpu_id == 0)
+    {
+        bsp_initialized = true;
+    }
+    spin_unlock(&apic_timer_init_lock);
     // kinfo("Successfully initialized apic timer for cpu %d", proc_current_cpu_id);
 }

+ 2 - 0
kernel/src/driver/interrupt/apic/apic_timer.h

@@ -105,4 +105,6 @@ static __always_inline uint32_t apic_timer_get_current()
  */
 void apic_timer_init();
 
+void apic_timer_ap_core_init();
+
 #pragma GCC pop_options

+ 2 - 0
kernel/src/exception/irq.h

@@ -102,6 +102,8 @@ extern void (*local_apic_interrupt_table[LOCAL_APIC_IRQ_NUM])(void);
 
 200 ~   255	MP IPI
 
+	200 kick cpu 功能(使得某个核心立即运行进程调度)
+
 */
 
 typedef struct hardware_intr_type

+ 2 - 4
kernel/src/filesystem/fat32/fat32.c

@@ -372,10 +372,11 @@ find_lookup_success:; // 找到目标dentry
  */
 struct vfs_superblock_t *fat32_read_superblock(struct block_device *blk)
 {
+    // BUG
     // 读取文件系统的boot扇区
     uint8_t buf[512] = {0};
-    blk->bd_disk->fops->transfer(blk->bd_disk, AHCI_CMD_READ_DMA_EXT, blk->bd_start_LBA, 1, (uint64_t)&buf);
 
+    blk->bd_disk->fops->transfer(blk->bd_disk, AHCI_CMD_READ_DMA_EXT, blk->bd_start_LBA, 1, (uint64_t)&buf);
     // 分配超级块的空间
     struct vfs_superblock_t *sb_ptr = (struct vfs_superblock_t *)kzalloc(sizeof(struct vfs_superblock_t), 0);
     blk->bd_superblock = sb_ptr;
@@ -445,7 +446,6 @@ struct vfs_superblock_t *fat32_read_superblock(struct block_device *blk)
     finode->create_date = 0;
     finode->write_date = 0;
     finode->write_time;
-
     return sb_ptr;
 }
 
@@ -1411,8 +1411,6 @@ void fat32_init()
 
     // 在VFS中注册fat32文件系统
     vfs_register_filesystem(&fat32_fs_type);
-
     // 挂载根文件系统
     fat32_register_partition(ahci_gendisk0.partition + 0, 0);
-    kinfo("FAT32 initialized.");
 }

+ 6 - 2
kernel/src/include/bindings/wrapper.h

@@ -10,6 +10,8 @@
  */
 #pragma once
 
+#include <common/blk_types.h>
+#include <common/completion.h>
 #include <common/crc16.h>
 #include <common/crc32.h>
 #include <common/crc64.h>
@@ -18,16 +20,18 @@
 #include <common/gfp.h>
 #include <common/glib.h>
 #include <common/kfifo.h>
+#include <common/kthread.h>
 #include <common/list.h>
 #include <common/lz4.h>
 #include <common/printk.h>
 #include <common/spinlock.h>
+#include <common/time.h>
 #include <common/unistd.h>
-#include <common/glib.h>
+#include <driver/disk/ahci/ahci.h>
 #include <include/DragonOS/refcount.h>
 #include <include/DragonOS/signal.h>
 #include <mm/mm.h>
 #include <mm/slab.h>
 #include <process/process.h>
 #include <sched/sched.h>
-#include <time/sleep.h>
+#include <time/sleep.h>

+ 17 - 0
kernel/src/io/Makefile

@@ -0,0 +1,17 @@
+
+CFLAGS += -I .
+
+kernel_io_subdirs:=block
+
+ECHO:
+	@echo "$@"
+
+$(kernel_io_subdirs): ECHO
+
+	$(MAKE) -C $@ all CFLAGS="$(CFLAGS)" ASFLAGS="$(ASFLAGS)" PIC="$(PIC)"
+
+all: $(kernel_io_subdirs)
+
+
+clean:
+	echo "Done."

+ 17 - 0
kernel/src/io/block/Makefile

@@ -0,0 +1,17 @@
+
+CFLAGS += -I .
+
+
+kernel_io_block_objs:= $(shell find ./*.c )
+
+ECHO:
+	@echo "$@"
+
+$(kernel_io_block_objs): ECHO
+	$(CC) $(CFLAGS) -c $@ -o [email protected]
+
+all: $(kernel_io_block_objs)
+
+
+clean:
+	echo "Done."

+ 15 - 0
kernel/src/io/block/block_io_scheduler.c

@@ -0,0 +1,15 @@
+#include <common/kthread.h>
+#include <io/block/block_io_scheduler.h>
+#include <sched/sched.h>
+#include <smp/smp.h>
+/**
+ * @brief 初始化io调度器
+ */
+void block_io_scheduler_init()
+{
+    // 使用rust中的函数进行初始化
+    block_io_scheduler_init_rust();
+    struct process_control_block *pcb = kthread_run(&block_io_scheduler_address_requests, NULL, "block_io_scheduler", NULL);
+    if (smp_get_total_cpu() > 1)
+        sched_migrate_process(pcb, 1);
+}

+ 9 - 0
kernel/src/io/block/block_io_scheduler.h

@@ -0,0 +1,9 @@
+#pragma once
+
+extern void block_io_scheduler_address_requests();
+extern void block_io_scheduler_init_rust();
+
+/**
+ * @brief 初始化io调度器
+ */
+void block_io_scheduler_init();

+ 304 - 0
kernel/src/io/block/block_io_scheduler.rs

@@ -0,0 +1,304 @@
+use core::{ptr::null_mut, sync::atomic::compiler_fence};
+
+use alloc::{boxed::Box, vec::Vec};
+
+use crate::{
+    arch::mm::barrier::mfence,
+    include::bindings::bindings::{
+        ahci_check_complete, ahci_query_disk, ahci_request_packet_t, block_device_request_packet,
+        complete, completion, completion_alloc, wait_for_completion,
+    },
+    kBUG,
+    libs::spinlock::RawSpinlock,
+};
+#[derive(Debug)]
+///  achi请求包
+pub struct AhciRequestPacket {
+    pub ahci_ctrl_num: u8,
+    pub port_num: u8,
+    pub slot: i8,
+}
+
+impl AhciRequestPacket {
+    pub fn new() -> Self {
+        return AhciRequestPacket {
+            ..Default::default()
+        };
+    }
+}
+
+impl Default for AhciRequestPacket {
+    fn default() -> Self {
+        AhciRequestPacket {
+            ahci_ctrl_num: 0,
+            port_num: Default::default(),
+            slot: -1,
+        }
+    }
+}
+#[derive(Debug)]
+
+/// io请求包
+pub struct BlockDeviceRequestPacket<T> {
+    pub cmd: u8,
+    pub lba_start: u64,
+    pub count: u32,
+    pub buffer_vaddr: u64,
+    pub device_type: u8, // 0: ahci
+    pub end_handler: ::core::option::Option<
+        unsafe extern "C" fn(num: ::core::ffi::c_ulong, arg: ::core::ffi::c_ulong),
+    >,
+    pub private_ahci_request_packet: T,
+    pub status: *mut completion,
+}
+impl<AhciRequestPacket> BlockDeviceRequestPacket<AhciRequestPacket> {
+    pub fn new(
+        ahci_request_packet: AhciRequestPacket,
+    ) -> BlockDeviceRequestPacket<AhciRequestPacket> {
+        let cmpl: *mut completion = unsafe { completion_alloc() };
+
+        return BlockDeviceRequestPacket {
+            cmd: Default::default(),
+            lba_start: Default::default(),
+            count: Default::default(),
+            buffer_vaddr: Default::default(),
+            device_type: Default::default(),
+            end_handler: Default::default(),
+            private_ahci_request_packet: ahci_request_packet,
+            status: cmpl,
+        };
+    }
+}
+
+struct RequestQueue {
+    lock: RawSpinlock,
+    waiting_queue: Vec<BlockDeviceRequestPacket<AhciRequestPacket>>,
+    processing_queue: Vec<BlockDeviceRequestPacket<AhciRequestPacket>>,
+}
+
+impl RequestQueue {
+    pub fn new() -> RequestQueue {
+        RequestQueue {
+            lock: RawSpinlock::INIT,
+            waiting_queue: Vec::new(),
+            processing_queue: Vec::new(),
+        }
+    }
+
+    ///  @brief 将请求包插入等待队列中
+    pub fn push_waiting_queue(
+        &mut self,
+        ahci_request_packet: BlockDeviceRequestPacket<AhciRequestPacket>,
+    ) {
+        self.waiting_queue.push(ahci_request_packet);
+    }
+
+    ///  @brief 将请求包从正在执行队列中弹出
+    pub fn pop_waiting_queue(&mut self) -> Option<BlockDeviceRequestPacket<AhciRequestPacket>> {
+        let mut res: Option<BlockDeviceRequestPacket<AhciRequestPacket>> = None;
+        if self.waiting_queue.len() == 0 {
+            return res;
+        }
+        res = Some(self.waiting_queue.remove(0));
+        return res;
+    }
+
+    ///  @brief 将请求包插入正在执行队列中
+    pub fn push_processing_queue(
+        &mut self,
+        ahci_request_packet: BlockDeviceRequestPacket<AhciRequestPacket>,
+    ) {
+        self.processing_queue.push(ahci_request_packet);
+    }
+
+    ///  @brief 将请求包从正在执行队列中弹出
+    pub fn pop_processing_queue(&mut self) -> Option<BlockDeviceRequestPacket<AhciRequestPacket>> {
+        let mut res: Option<BlockDeviceRequestPacket<AhciRequestPacket>> = None;
+        if self.processing_queue.len() == 0 {
+            return res;
+        }
+        res = Some(self.processing_queue.remove(0));
+        return res;
+    }
+
+    ///  @brief 将已完成请求包从执行队列中弹出
+    pub fn pop_finished_packets(&mut self) {
+        if self.processing_queue.len() != 0 {
+            compiler_fence(core::sync::atomic::Ordering::SeqCst);
+            //将状态设置为完成
+            mfence();
+            // 过滤器,过滤已完成的请求包
+            let filter = |packet: &mut BlockDeviceRequestPacket<AhciRequestPacket>| {
+                //判断请求是否完成
+                let res = unsafe {
+                    ahci_check_complete(
+                        packet.private_ahci_request_packet.port_num,
+                        packet.private_ahci_request_packet.ahci_ctrl_num,
+                        packet.private_ahci_request_packet.slot,
+                        null_mut(),
+                    )
+                };
+                // 完成则complete请求包
+                if res == 0 {
+                    unsafe {
+                        compiler_fence(core::sync::atomic::Ordering::SeqCst);
+                        complete(packet.status);
+                        compiler_fence(core::sync::atomic::Ordering::SeqCst);
+                    }
+                    return true;
+                }
+                return false;
+            };
+            self.processing_queue.drain_filter(filter);
+            mfence();
+            compiler_fence(core::sync::atomic::Ordering::SeqCst);
+        }
+    }
+}
+
+pub struct SchedulerIO {
+    io_queue: Vec<&'static mut RequestQueue>,
+}
+
+impl SchedulerIO {
+    pub fn new() -> SchedulerIO {
+        return SchedulerIO {
+            io_queue: Default::default(),
+        };
+    }
+}
+// io调度器全局指针
+pub static mut IO_SCHEDULER_PTR: *mut SchedulerIO = null_mut();
+
+#[inline]
+pub fn __get_io_scheduler() -> &'static mut SchedulerIO {
+    return unsafe { IO_SCHEDULER_PTR.as_mut().unwrap() };
+}
+
+/// @brief 初始化io调度器
+#[no_mangle]
+pub unsafe extern "C" fn block_io_scheduler_init_rust() {
+    if IO_SCHEDULER_PTR.is_null() {
+        IO_SCHEDULER_PTR = Box::leak(Box::new(SchedulerIO::new()));
+        create_io_queue();
+    } else {
+        kBUG!("Try to init IO Scheduler twice.");
+        panic!("Try to init IO Scheduler twice.");
+    }
+}
+
+/// @brief 初始化io请求队列
+#[no_mangle]
+pub extern "C" fn create_io_queue() {
+    let io_scheduler = __get_io_scheduler();
+    io_scheduler
+        .io_queue
+        .push(Box::leak(Box::new(RequestQueue::new())));
+}
+
+#[no_mangle]
+/// @brief 处理请求 (守护线程运行)
+pub extern "C" fn block_io_scheduler_address_requests() {
+    let io_scheduler = __get_io_scheduler();
+
+    compiler_fence(core::sync::atomic::Ordering::SeqCst);
+    //FIXME 暂时只考虑了一个io队列的情况
+    loop {
+        compiler_fence(core::sync::atomic::Ordering::SeqCst);
+
+        //请不要修改下面三个循环的顺序
+        let size = io_scheduler.io_queue[0].waiting_queue.len();
+        for i in 0..16 {
+            // 正在运行队列大小限制为16
+            if i >= size || io_scheduler.io_queue[0].processing_queue.len() == 16 {
+                break;
+            }
+            compiler_fence(core::sync::atomic::Ordering::SeqCst);
+            // if !io_scheduler.io_queue[0].lock.is_locked() {
+            io_scheduler.io_queue[0].lock.lock();
+            let mut packet = io_scheduler.io_queue[0].pop_waiting_queue().unwrap();
+            //将rust中的请求包转成c中的请求包
+            let mut ahci_packet: ahci_request_packet_t = convert_c_ahci_request(&packet);
+            let mut ret_slot: i8 = -1;
+            //分发请求包
+            unsafe {
+                compiler_fence(core::sync::atomic::Ordering::SeqCst);
+                ahci_query_disk(&mut ahci_packet, &mut ret_slot);
+                compiler_fence(core::sync::atomic::Ordering::SeqCst);
+            }
+            //获取请求运行的插槽
+            packet.private_ahci_request_packet.slot = ret_slot;
+            io_scheduler.io_queue[0].push_processing_queue(packet);
+            io_scheduler.io_queue[0].lock.unlock();
+            // }
+
+            compiler_fence(core::sync::atomic::Ordering::SeqCst);
+        }
+        compiler_fence(core::sync::atomic::Ordering::SeqCst);
+
+        //检查是否有完成的请求包
+        io_scheduler.io_queue[0].lock.lock();
+        io_scheduler.io_queue[0].pop_finished_packets();
+        io_scheduler.io_queue[0].lock.unlock();
+        mfence();
+        compiler_fence(core::sync::atomic::Ordering::SeqCst);
+    }
+}
+
+pub fn convert_c_ahci_request(
+    pakcet: &BlockDeviceRequestPacket<AhciRequestPacket>,
+) -> ahci_request_packet_t {
+    let ahci_packet: ahci_request_packet_t = ahci_request_packet_t {
+        ahci_ctrl_num: pakcet.private_ahci_request_packet.ahci_ctrl_num,
+        port_num: pakcet.private_ahci_request_packet.port_num,
+        blk_pak: block_device_request_packet {
+            LBA_start: pakcet.lba_start,
+            cmd: pakcet.cmd,
+            buffer_vaddr: pakcet.buffer_vaddr,
+            count: pakcet.count,
+            device_type: pakcet.device_type,
+            end_handler: pakcet.end_handler,
+        },
+    };
+    return ahci_packet;
+}
+
+/// @brief 将c中的ahci_request_packet_t转换成rust中的BlockDeviceRequestPacket<AhciRequestPacket>
+pub fn create_ahci_request(
+    ahci_request_packet: &ahci_request_packet_t,
+) -> BlockDeviceRequestPacket<AhciRequestPacket> {
+    let cmpl: *mut completion = unsafe { completion_alloc() };
+    let ahci_packet = AhciRequestPacket {
+        ahci_ctrl_num: ahci_request_packet.ahci_ctrl_num,
+        port_num: ahci_request_packet.port_num,
+        slot: -1,
+    };
+    let packet = BlockDeviceRequestPacket {
+        private_ahci_request_packet: ahci_packet,
+        buffer_vaddr: ahci_request_packet.blk_pak.buffer_vaddr,
+        cmd: ahci_request_packet.blk_pak.cmd,
+        count: ahci_request_packet.blk_pak.count,
+        device_type: ahci_request_packet.blk_pak.device_type,
+        end_handler: ahci_request_packet.blk_pak.end_handler,
+        lba_start: ahci_request_packet.blk_pak.LBA_start,
+        status: cmpl,
+    };
+
+    return packet;
+}
+
+#[no_mangle]
+/// @brief 将ahci的io请求插入等待队列中
+pub extern "C" fn ahci_push_request(ahci_request_packet: &ahci_request_packet_t) {
+    let packet = create_ahci_request(ahci_request_packet);
+    let io_scheduler = __get_io_scheduler();
+    let status = packet.status;
+    io_scheduler.io_queue[0].lock.lock();
+    io_scheduler.io_queue[0].push_waiting_queue(packet);
+    io_scheduler.io_queue[0].lock.unlock();
+    compiler_fence(core::sync::atomic::Ordering::SeqCst);
+    unsafe {
+        wait_for_completion(status);
+    }
+    compiler_fence(core::sync::atomic::Ordering::SeqCst);
+}

+ 1 - 0
kernel/src/io/block/mod.rs

@@ -0,0 +1 @@
+pub mod block_io_scheduler;

+ 1 - 0
kernel/src/io/mod.rs

@@ -0,0 +1 @@
+pub mod block;

+ 1 - 0
kernel/src/lib.rs

@@ -25,6 +25,7 @@ mod ipc;
 #[macro_use]
 mod libs;
 mod exception;
+pub mod io;
 mod mm;
 mod process;
 mod sched;

+ 1 - 1
kernel/src/libs/cpu.c

@@ -119,4 +119,4 @@ uint32_t cpu_get_core_crysral_freq()
     // kdebug("Cpu_cpuid_max_Basic_mop = %#03x, a=%ld, b=%ld, c=%ld, d=%ld", Cpu_cpuid_max_Basic_mop, a, b, c, d);
 
     return c;
-}
+}

+ 1 - 5
kernel/src/libs/semaphore.c

@@ -35,10 +35,6 @@ void semaphore_up(semaphore_t *sema)
         wait_queue_node_t *wq = container_of(list_next(&sema->wait_queue.wait_list), wait_queue_node_t, wait_list);
         list_del(&wq->wait_list);
 
-        wq->pcb->state = PROC_RUNNING;
-        sched_enqueue(wq->pcb);
-
-        // 当前进程缺少需要的资源,立即标为需要被调度
-        current_pcb->flags |= PF_NEED_SCHED;
+        process_wakeup_immediately(wq->pcb);
     }
 };

+ 1 - 2
kernel/src/libs/spinlock.rs

@@ -134,11 +134,10 @@ impl RawSpinlock {
     }
 
     /// @brief 恢复rflags以及中断状态并解锁自旋锁
-    pub fn unlock_irqrestore(&self, flags: &u64){
+    pub fn unlock_irqrestore(&self, flags: &u64) {
         self.unlock();
         local_irq_restore(flags);
     }
-
 }
 /// 实现了守卫的SpinLock, 能够支持内部可变性
 ///

+ 1 - 1
kernel/src/libs/wait_queue_head.c

@@ -77,6 +77,6 @@ void wait_queue_wakeup_on_stack(wait_queue_head_t *q, int64_t state)
     if (wait->pcb->state & state)
     {
         list_del_init(&wait->wait_list);
-        process_wakeup(wait->pcb);
+        process_wakeup_immediately(wait->pcb);
     }
 }

+ 11 - 5
kernel/src/process/proc-types.h

@@ -1,9 +1,9 @@
 #pragma once
 
+#include "ptrace.h"
 #include <DragonOS/signal.h>
-#include <common/wait_queue.h>
 #include <DragonOS/stdint.h>
-#include "ptrace.h"
+#include <common/wait_queue.h>
 
 // 进程最大可拥有的文件描述符数量
 #define PROC_MAX_FD_NUM 16
@@ -38,7 +38,7 @@
 #define CLONE_VM (1UL << 2)            // 在进程间共享虚拟内存空间
 #define CLONE_SIGHAND (1UL << 3)       // 克隆时,与父进程共享信号处理结构体
 #define CLONE_CLEAR_SIGHAND (1UL << 4) // 克隆时,将原本被设置为SIG_IGNORE的信号,设置回SIG_DEFAULT
-#define CLONE_THREAD (1UL << 5)       // 拷贝线程
+#define CLONE_THREAD (1UL << 5)        // 拷贝线程
 #define PCB_NAME_LEN 16
 
 struct thread_struct
@@ -69,6 +69,8 @@ struct thread_struct
 #define PF_EXITING (1UL << 5)  // 进程正在退出
 #define PF_WAKEKILL (1UL << 6) // 进程由于接收到终止信号唤醒
 #define PF_SIGNALED (1UL << 7) // 进程由于接收到信号而退出
+#define PF_NEED_MIGRATE (1UL << 8) // 进程需要迁移到其他的核心
+
 /**
  * @brief 进程控制块
  *
@@ -82,6 +84,7 @@ struct process_control_block
     int32_t preempt_count; // 持有的自旋锁的数量
 
     uint32_t cpu_id; // 当前进程在哪个CPU核心上运行
+    // pcb的名字
     char name[PCB_NAME_LEN];
 
     // 内存空间分布结构体, 记录内存页表和程序段信息
@@ -94,7 +97,7 @@ struct process_control_block
     struct List list;
 
     // todo:给pcb中加一个spinlock_t成员
-    //进程自旋锁
+    // 进程自旋锁
     //  spinlock_t alloc_lock;
 
     // 地址空间范围
@@ -105,7 +108,7 @@ struct process_control_block
     long pid;
     long priority;           // 优先级
     int64_t virtual_runtime; // 虚拟运行时间
-    int64_t rt_time_slice;  // 由实时调度器管理的时间片
+    int64_t rt_time_slice;   // 由实时调度器管理的时间片
 
     // 进程拥有的文件描述符的指针数组
     // todo: 改用动态指针数组
@@ -131,6 +134,9 @@ struct process_control_block
     // 正在等待的信号的标志位,表示某个信号正在等待处理
     struct sigpending sig_pending;
 
+    // 如果当前进程等待被迁移到另一个cpu核心上(也就是flags中的PF_NEED_MIGRATE被置位),
+    // 该字段存储要被迁移到的目标处理器核心号
+    uint32_t migrate_to;
 };
 
 // 将进程的pcb和内核栈融合到一起,8字节对齐

+ 13 - 16
kernel/src/process/process.c

@@ -6,6 +6,7 @@
 #include <common/elf.h>
 #include <common/kprint.h>
 #include <common/kthread.h>
+#include <common/lz4.h>
 #include <common/printk.h>
 #include <common/spinlock.h>
 #include <common/stdio.h>
@@ -23,15 +24,13 @@
 #include <filesystem/fat32/fat32.h>
 #include <filesystem/procfs/procfs.h>
 #include <filesystem/rootfs/rootfs.h>
+#include <io/block/block_io_scheduler.h>
 #include <ktest/ktest.h>
+#include <mm/mmio.h>
 #include <mm/slab.h>
 #include <sched/sched.h>
 #include <syscall/syscall.h>
 #include <syscall/syscall_num.h>
-
-#include <mm/mmio.h>
-
-#include <common/lz4.h>
 extern int __rust_demo_func();
 // #pragma GCC push_options
 // #pragma GCC optimize("O0")
@@ -499,6 +498,7 @@ ul initial_kernel_thread(ul arg)
 
     scm_enable_double_buffer();
 
+    block_io_scheduler_init();
     ahci_init();
     fat32_init();
     rootfs_umount();
@@ -529,7 +529,6 @@ ul initial_kernel_thread(ul arg)
     // struct process_control_block *test_rt1 = kthread_run_rt(&test, NULL, "test rt");
     // kdebug("process:rt test kthread is created!!!!");
 
-
     // 准备切换到用户态
     struct pt_regs *regs;
 
@@ -649,11 +648,6 @@ void process_init()
 
     initial_tss[proc_current_cpu_id].rsp0 = initial_thread.rbp;
 
-    /*
-    kdebug("initial_thread.rbp=%#018lx", initial_thread.rbp);
-    kdebug("initial_tss[0].rsp1=%#018lx", initial_tss[0].rsp1);
-    kdebug("initial_tss[0].ist1=%#018lx", initial_tss[0].ist1);
-*/
     // 初始化pid的写锁
 
     spin_init(&process_global_pid_write_lock);
@@ -714,7 +708,6 @@ struct process_control_block *process_find_pcb_by_pid(pid_t pid)
  */
 int process_wakeup(struct process_control_block *pcb)
 {
-    // kdebug("pcb pid = %#018lx", pcb->pid);
 
     BUG_ON(pcb == NULL);
     if (pcb == NULL)
@@ -724,8 +717,8 @@ int process_wakeup(struct process_control_block *pcb)
         return 0;
 
     pcb->state |= PROC_RUNNING;
-    sched_enqueue(pcb);
-    return 1;
+    sched_enqueue(pcb, true);
+    return 0;
 }
 
 /**
@@ -741,7 +734,13 @@ int process_wakeup_immediately(struct process_control_block *pcb)
     if (retval != 0)
         return retval;
     // 将当前进程标志为需要调度,缩短新进程被wakeup的时间
-    current_pcb->flags |= PF_NEED_SCHED;
+        current_pcb->flags |= PF_NEED_SCHED;
+
+    if (pcb->cpu_id == current_pcb->cpu_id)
+        sched();
+    else
+        kick_cpu(pcb->cpu_id);
+    return 0;
 }
 
 /**
@@ -802,7 +801,6 @@ uint64_t process_exit_mm(struct process_control_block *pcb)
         vma = cur_vma->vm_next;
 
         uint64_t pa;
-        // kdebug("vm start=%#018lx, sem=%d", cur_vma->vm_start, cur_vma->anon_vma->sem.counter);
         mm_unmap_vma(pcb->mm, cur_vma, &pa);
 
         uint64_t size = (cur_vma->vm_end - cur_vma->vm_start);
@@ -876,7 +874,6 @@ int process_fd_alloc(struct vfs_file_t *file)
 
     for (int i = 0; i < PROC_MAX_FD_NUM; ++i)
     {
-        // kdebug("currentpcb->fds[%d]=%#018lx", i, current_pcb->fds[i]);
         /* 找到指针数组中的空位 */
         if (current_pcb->fds[i] == NULL)
         {

+ 2 - 1
kernel/src/process/process.rs

@@ -56,8 +56,9 @@ pub extern "C" fn process_try_to_wake_up(
         // 可以wakeup
         unsafe {
             write_volatile(&mut pcb.state, PROC_RUNNING as u64);
-            sched_enqueue(pcb);
         }
+        sched_enqueue(pcb, true);
+
         retval = true;
     }
     // todo: 对pcb的pi_lock放锁

+ 43 - 6
kernel/src/sched/cfs.rs

@@ -9,6 +9,7 @@ use crate::{
     },
     kBUG,
     libs::spinlock::RawSpinlock,
+    smp::core::smp_get_processor_id,
 };
 
 use super::core::{sched_enqueue, Scheduler};
@@ -42,14 +43,17 @@ struct CFSQueue {
     lock: RawSpinlock,
     /// 进程的队列
     queue: Vec<&'static mut process_control_block>,
+    /// 当前核心的队列专属的IDLE进程的pcb
+    idle_pcb: *mut process_control_block,
 }
 
 impl CFSQueue {
-    pub fn new() -> CFSQueue {
+    pub fn new(idle_pcb: *mut process_control_block) -> CFSQueue {
         CFSQueue {
             cpu_exec_proc_jiffies: 0,
             lock: RawSpinlock::INIT,
             queue: Vec::new(),
+            idle_pcb: idle_pcb,
         }
     }
 
@@ -83,11 +87,22 @@ impl CFSQueue {
             res = self.queue.pop().unwrap();
         } else {
             // 如果队列为空,则返回IDLE进程的pcb
-            res = unsafe { &mut initial_proc_union.pcb };
+            res = unsafe { self.idle_pcb.as_mut().unwrap() };
         }
         self.lock.unlock();
         return res;
     }
+
+    /// @brief 获取cfs队列的最小运行时间
+    ///
+    /// @return Option<i64> 如果队列不为空,那么返回队列中,最小的虚拟运行时间;否则返回None
+    pub fn min_vruntime(&self) -> Option<i64> {
+        if !self.queue.is_empty() {
+            return Some(self.queue.first().unwrap().virtual_runtime);
+        } else {
+            return None;
+        }
+    }
 }
 
 /// @brief CFS调度器类
@@ -105,8 +120,12 @@ impl SchedulerCFS {
 
         // 为每个cpu核心创建队列
         for _ in 0..MAX_CPU_NUM {
-            result.cpu_queue.push(Box::leak(Box::new(CFSQueue::new())));
+            result
+                .cpu_queue
+                .push(Box::leak(Box::new(CFSQueue::new(null_mut()))));
         }
+        // 设置cpu0的pcb
+        result.cpu_queue[0].idle_pcb = unsafe { &mut initial_proc_union.pcb };
 
         return result;
     }
@@ -137,6 +156,22 @@ impl SchedulerCFS {
         // 更新当前进程的虚拟运行时间
         current_pcb().virtual_runtime += 1;
     }
+
+    /// @brief 将进程加入cpu的cfs调度队列,并且重设其虚拟运行时间为当前队列的最小值
+    pub fn enqueue_reset_vruntime(&mut self, pcb: &'static mut process_control_block) {
+        let cpu_queue = &mut self.cpu_queue[pcb.cpu_id as usize];
+        if cpu_queue.queue.len() > 0 {
+            pcb.virtual_runtime = cpu_queue.min_vruntime().unwrap();
+        }
+
+        cpu_queue.enqueue(pcb);
+    }
+
+    /// @brief 设置cpu的队列的IDLE进程的pcb
+    pub fn set_cpu_idle(&mut self, cpu_id: usize, pcb: *mut process_control_block) {
+        // kdebug!("set cpu idle: id={}", cpu_id);
+        self.cpu_queue[cpu_id].idle_pcb = pcb;
+    }
 }
 
 impl Scheduler for SchedulerCFS {
@@ -144,7 +179,8 @@ impl Scheduler for SchedulerCFS {
     /// 请注意,进入该函数之前,需要关中断
     fn sched(&mut self) -> Option<&'static mut process_control_block> {
         current_pcb().flags &= !(PF_NEED_SCHED as u64);
-        let current_cpu_id = current_pcb().cpu_id as usize;
+        let current_cpu_id = smp_get_processor_id() as usize;
+
         let current_cpu_queue: &mut CFSQueue = self.cpu_queue[current_cpu_id];
         let proc: &'static mut process_control_block = current_cpu_queue.dequeue();
         compiler_fence(core::sync::atomic::Ordering::SeqCst);
@@ -155,7 +191,7 @@ impl Scheduler for SchedulerCFS {
             compiler_fence(core::sync::atomic::Ordering::SeqCst);
             // 本次切换由于时间片到期引发,则再次加入就绪队列,否则交由其它功能模块进行管理
             if current_pcb().state & (PROC_RUNNING as u64) != 0 {
-                sched_enqueue(current_pcb());
+                sched_enqueue(current_pcb(), false);
                 compiler_fence(core::sync::atomic::Ordering::SeqCst);
             }
 
@@ -166,6 +202,7 @@ impl Scheduler for SchedulerCFS {
             }
 
             compiler_fence(core::sync::atomic::Ordering::SeqCst);
+
             return Some(proc);
         } else {
             // 不进行切换
@@ -177,7 +214,7 @@ impl Scheduler for SchedulerCFS {
             }
 
             compiler_fence(core::sync::atomic::Ordering::SeqCst);
-            sched_enqueue(proc);
+            sched_enqueue(proc, false);
             compiler_fence(core::sync::atomic::Ordering::SeqCst);
         }
         compiler_fence(core::sync::atomic::Ordering::SeqCst);

+ 11 - 1
kernel/src/sched/completion.c

@@ -204,7 +204,7 @@ void wait_for_multicompletion(struct completion x[], int n)
         {
             wait_for_completion(&x[i]);
         }
-        else if (!try_wait_for_completion(&x[i])) //上面测试过done>0,那么这里尝试去获取一个done,如果失败了,就继续wait
+        else if (!try_wait_for_completion(&x[i])) // 上面测试过done>0,那么这里尝试去获取一个done,如果失败了,就继续wait
         {
             wait_for_completion(&x[i]);
         }
@@ -325,4 +325,14 @@ void __test_completion()
     kfree(waiter_data);
     kfree(worker_data);
     // kdebug("completion test done.");
+}
+
+/**
+ * @brief rust 获取completion
+ */
+struct completion *completion_alloc()
+{
+    struct completion *cmpl = kzalloc(sizeof(struct completion), 0);
+    completion_init(cmpl);
+    return cmpl;
 }

+ 56 - 7
kernel/src/sched/core.rs

@@ -2,11 +2,14 @@ use core::sync::atomic::compiler_fence;
 
 use crate::{
     arch::asm::{current::current_pcb, ptrace::user_mode},
-    arch::context::switch_process,
+    arch::{
+        context::switch_process,
+        interrupt::{cli, sti},
+    },
     include::bindings::bindings::{
-        process_control_block, pt_regs, EPERM, PROC_RUNNING, SCHED_FIFO, SCHED_NORMAL, SCHED_RR,
+        process_control_block, pt_regs, EINVAL, EPERM, MAX_CPU_NUM, PF_NEED_MIGRATE, PROC_RUNNING,
+        SCHED_FIFO, SCHED_NORMAL, SCHED_RR,
     },
-    kdebug,
     process::process::process_cpu,
 };
 
@@ -46,7 +49,7 @@ fn __sched() -> Option<&'static mut process_control_block> {
             next = p;
             // kdebug!("next pcb is {}",next.pid);
             // rt_scheduler.enqueue_task_rt(next.priority as usize, next);
-            sched_enqueue(next);
+            sched_enqueue(next, false);
             return rt_scheduler.sched();
         }
         None => {
@@ -56,17 +59,34 @@ fn __sched() -> Option<&'static mut process_control_block> {
 }
 
 /// @brief 将进程加入调度队列
+///
+/// @param pcb 要被加入队列的pcb
+/// @param reset_time 是否重置虚拟运行时间
 #[allow(dead_code)]
 #[no_mangle]
-pub extern "C" fn sched_enqueue(pcb: &'static mut process_control_block) {
+pub extern "C" fn sched_enqueue(pcb: &'static mut process_control_block, mut reset_time: bool) {
     // 调度器不处理running位为0的进程
     if pcb.state & (PROC_RUNNING as u64) == 0 {
         return;
     }
     let cfs_scheduler = __get_cfs_scheduler();
     let rt_scheduler = __get_rt_scheduler();
+
+    compiler_fence(core::sync::atomic::Ordering::SeqCst);
+    if (pcb.flags & (PF_NEED_MIGRATE as u64)) != 0 {
+        // kdebug!("migrating pcb:{:?}", pcb);
+        pcb.flags &= !(PF_NEED_MIGRATE as u64);
+        pcb.cpu_id = pcb.migrate_to;
+        reset_time = true;
+    }
+    compiler_fence(core::sync::atomic::Ordering::SeqCst);
+
     if pcb.policy == SCHED_NORMAL {
-        cfs_scheduler.enqueue(pcb);
+        if reset_time {
+            cfs_scheduler.enqueue_reset_vruntime(pcb);
+        } else {
+            cfs_scheduler.enqueue(pcb);
+        }
     } else if pcb.policy == SCHED_FIFO || pcb.policy == SCHED_RR {
         rt_scheduler.enqueue(pcb);
     } else {
@@ -107,6 +127,7 @@ pub extern "C" fn sched_update_jiffies() {
 #[allow(dead_code)]
 #[no_mangle]
 pub extern "C" fn sys_sched(regs: &'static mut pt_regs) -> u64 {
+    cli();
     // 进行权限校验,拒绝用户态发起调度
     if user_mode(regs) {
         return (-(EPERM as i64)) as u64;
@@ -116,5 +137,33 @@ pub extern "C" fn sys_sched(regs: &'static mut pt_regs) -> u64 {
     if pcb.is_some() {
         switch_process(current_pcb(), pcb.unwrap());
     }
-    0
+    sti();
+    return 0;
+}
+
+#[allow(dead_code)]
+#[no_mangle]
+pub extern "C" fn sched_set_cpu_idle(cpu_id: usize, pcb: *mut process_control_block) {
+    __get_cfs_scheduler().set_cpu_idle(cpu_id, pcb);
+}
+
+/// @brief 设置进程需要等待迁移到另一个cpu核心。
+/// 当进程被重新加入队列时,将会更新其cpu_id,并加入正确的队列
+///
+/// @return i32 成功返回0,否则返回posix错误码
+#[allow(dead_code)]
+#[no_mangle]
+pub extern "C" fn sched_migrate_process(
+    pcb: &'static mut process_control_block,
+    target: usize,
+) -> i32 {
+    if target > MAX_CPU_NUM.try_into().unwrap() {
+        // panic!("sched_migrate_process: target > MAX_CPU_NUM");
+        return -(EINVAL as i32);
+    }
+
+    pcb.flags |= PF_NEED_MIGRATE as u64;
+    pcb.migrate_to = target as u32;
+    // kdebug!("pid:{} migrate to cpu:{}", pcb.pid, target);
+    return 0;
 }

+ 8 - 12
kernel/src/sched/rt.rs

@@ -4,10 +4,7 @@ use alloc::{boxed::Box, vec::Vec};
 
 use crate::{
     arch::asm::current::current_pcb,
-    include::bindings::bindings::{
-        initial_proc_union, process_control_block, PF_NEED_SCHED, SCHED_FIFO, SCHED_NORMAL,
-        SCHED_RR,
-    },
+    include::bindings::bindings::{process_control_block, PF_NEED_SCHED, SCHED_FIFO, SCHED_RR},
     kBUG, kdebug,
     libs::spinlock::RawSpinlock,
 };
@@ -73,12 +70,11 @@ impl RTQueue {
             res = Some(self.queue.pop().unwrap());
         } else {
             // 如果队列为空,则返回None
-            res=None;
+            res = None;
         }
         self.lock.unlock();
         return res;
     }
-
 }
 
 /// @brief RT调度器类
@@ -110,7 +106,7 @@ impl SchedulerRT {
         for i in 0..SchedulerRT::MAX_RT_PRIO {
             let cpu_queue_i: &mut RTQueue = self.cpu_queue[i as usize];
             let proc: Option<&'static mut process_control_block> = cpu_queue_i.dequeue();
-            if proc.is_some(){
+            if proc.is_some() {
                 return proc;
             }
         }
@@ -132,10 +128,10 @@ impl Scheduler for SchedulerRT {
         if proc.policy == SCHED_FIFO {
             // 如果挑选的进程优先级小于当前进程,则不进行切换
             if proc.priority <= current_pcb().priority {
-                sched_enqueue(proc);
+                sched_enqueue(proc, false);
             } else {
                 // 将当前的进程加进队列
-                sched_enqueue(current_pcb());
+                sched_enqueue(current_pcb(), false);
                 compiler_fence(core::sync::atomic::Ordering::SeqCst);
                 return Some(proc);
             }
@@ -148,19 +144,19 @@ impl Scheduler for SchedulerRT {
                 if proc.rt_time_slice <= 0 {
                     proc.rt_time_slice = SchedulerRT::RR_TIMESLICE;
                     proc.flags |= !(PF_NEED_SCHED as u64);
-                    sched_enqueue(proc);
+                    sched_enqueue(proc, false);
                 }
                 // 目标进程时间片未耗尽,切换到目标进程
                 else {
                     // 将当前进程加进队列
-                    sched_enqueue(current_pcb());
+                    sched_enqueue(current_pcb(), false);
                     compiler_fence(core::sync::atomic::Ordering::SeqCst);
                     return Some(proc);
                 }
             }
             // curr优先级更大,说明一定是实时进程,将所选进程入队列
             else {
-                sched_enqueue(proc);
+                sched_enqueue(proc, false);
             }
         }
         return None;

+ 3 - 1
kernel/src/sched/sched.h

@@ -62,6 +62,8 @@
 extern void sched_update_jiffies();
 extern void sched_init();
 extern void sched();
-extern void sched_enqueue(struct process_control_block *pcb);
+extern void sched_enqueue(struct process_control_block *pcb, bool reset_time);
+extern void sched_set_cpu_idle(uint64_t cpu_id, struct process_control_block *pcb);
+extern void sched_migrate_process(struct process_control_block *pcb, uint64_t target);
 
 void switch_proc(struct process_control_block *prev, struct process_control_block *proc);

+ 1 - 5
kernel/src/smp/core.rs

@@ -1,11 +1,7 @@
 /// @brief 获取当前的cpu id
 #[inline]
 pub fn smp_get_processor_id() -> u32 {
-    if cfg!(x86_64) {
-        return crate::arch::cpu::arch_current_apic_id() as u32;
-    } else {
-        255
-    }
+    return crate::arch::cpu::current_cpu_id() as u32;
 }
 
 #[inline]

+ 65 - 8
kernel/src/smp/smp.c

@@ -12,13 +12,18 @@
 
 #include "ipi.h"
 
+static void __smp_kick_cpu_handler(uint64_t irq_num, uint64_t param, struct pt_regs *regs);
+
 static spinlock_t multi_core_starting_lock = {1}; // 多核启动锁
 
 static struct acpi_Processor_Local_APIC_Structure_t *proc_local_apic_structs[MAX_SUPPORTED_PROCESSOR_NUM];
 static uint32_t total_processor_num = 0;
-int current_starting_cpu = 0;
+static int current_starting_cpu = 0;
+
+static int num_cpu_started = 1;
 
-int num_cpu_started = 1;
+// kick cpu 功能所使用的中断向量号
+#define KICK_CPU_IRQ_NUM 0xc8
 
 void smp_init()
 {
@@ -49,7 +54,10 @@ void smp_init()
     ipi_send_IPI(DEST_PHYSICAL, IDLE, ICR_LEVEL_DE_ASSERT, EDGE_TRIGGER, 0x00, ICR_INIT, ICR_ALL_EXCLUDE_Self, 0x00);
 
     kdebug("total_processor_num=%d", total_processor_num);
-    kdebug("rflags=%#018lx", get_rflags());
+    // 注册接收kick_cpu功能的处理函数。(向量号200)
+    ipi_regiserIPI(KICK_CPU_IRQ_NUM, NULL, &__smp_kick_cpu_handler, NULL, NULL, "IPI kick cpu");
+
+    int core_to_start = 0;
     // total_processor_num = 3;
     for (int i = 0; i < total_processor_num; ++i) // i从1开始,不初始化bsp
     {
@@ -61,15 +69,16 @@ void smp_init()
                proc_local_apic_structs[i]->flags);
         if (proc_local_apic_structs[i]->local_apic_id == 0)
         {
-            --total_processor_num;
+            // --total_processor_num;
             continue;
         }
         if (!((proc_local_apic_structs[i]->flags & 0x1) || (proc_local_apic_structs[i]->flags & 0x2)))
         {
-            --total_processor_num;
+            // --total_processor_num;
             kdebug("processor %d cannot be enabled.", proc_local_apic_structs[i]->ACPI_Processor_UID);
             continue;
         }
+        ++core_to_start;
         // continue;
         io_mfence();
         spin_lock(&multi_core_starting_lock);
@@ -114,7 +123,7 @@ void smp_init()
                      proc_local_apic_structs[i]->local_apic_id);
     }
     io_mfence();
-    while (num_cpu_started != total_processor_num)
+    while (num_cpu_started != (core_to_start + 1))
         pause();
 
     kinfo("Cleaning page table remapping...\n");
@@ -162,8 +171,8 @@ void smp_ap_start()
     current_pcb->virtual_runtime = 0;
 
     current_pcb->thread = (struct thread_struct *)(current_pcb + 1); // 将线程结构体放置在pcb后方
-    current_pcb->thread->rbp = _stack_start;
-    current_pcb->thread->rsp = _stack_start;
+    current_pcb->thread->rbp = cpu_core_info[current_starting_cpu].stack_start;
+    current_pcb->thread->rsp = cpu_core_info[current_starting_cpu].stack_start;
     current_pcb->thread->fs = KERNEL_DS;
     current_pcb->thread->gs = KERNEL_DS;
     current_pcb->cpu_id = current_starting_cpu;
@@ -173,14 +182,22 @@ void smp_ap_start()
     load_TR(10 + current_starting_cpu * 2);
     current_pcb->preempt_count = 0;
 
+    sched_set_cpu_idle(current_starting_cpu, current_pcb);
+
     io_mfence();
     spin_unlock(&multi_core_starting_lock);
     preempt_disable(); // 由于ap处理器的pcb与bsp的不同,因此ap处理器放锁时,需要手动恢复preempt count
     io_mfence();
+    current_pcb->flags |= PF_NEED_SCHED;
     sti();
+    apic_timer_ap_core_init();
+    sched();
 
     while (1)
+    {
+        // kdebug("123");
         hlt();
+    }
 
     while (1)
     {
@@ -189,3 +206,43 @@ void smp_ap_start()
     while (1) // 这里要循环hlt,原因是当收到中断后,核心会被唤醒,处理完中断之后不会自动hlt
         hlt();
 }
+
+/**
+ * @brief kick_cpu 核心间通信的处理函数
+ *
+ * @param irq_num
+ * @param param
+ * @param regs
+ */
+static void __smp_kick_cpu_handler(uint64_t irq_num, uint64_t param, struct pt_regs *regs)
+{
+    if (user_mode(regs))
+        return;
+    sched();
+}
+
+/**
+ * @brief 使得指定的cpu核心立即运行调度
+ *
+ * @param cpu_id cpu核心号
+ */
+int kick_cpu(uint32_t cpu_id)
+{
+    if (cpu_id >= MAX_CPU_NUM)
+    {
+        return -EINVAL;
+    }
+    ipi_send_IPI(DEST_PHYSICAL, IDLE, ICR_LEVEL_DE_ASSERT, EDGE_TRIGGER, KICK_CPU_IRQ_NUM, ICR_APIC_FIXED,
+                 ICR_ALL_EXCLUDE_Self, 0);
+    return 0;
+}
+
+/**
+ * @brief 获取当前全部的cpu数目
+ *
+ * @return uint32_t
+ */
+uint32_t smp_get_total_cpu()
+{
+    return num_cpu_started;
+}

+ 4 - 0
kernel/src/smp/smp.h

@@ -16,3 +16,7 @@ extern uchar _apu_boot_end[];
  *
  */
 void smp_init();
+
+int kick_cpu(uint32_t cpu_id);
+
+uint32_t smp_get_total_cpu();

+ 2 - 3
kernel/src/syscall/syscall.c

@@ -578,7 +578,7 @@ uint64_t sys_nanosleep(struct pt_regs *regs)
 
 ul sys_ahci_end_req(struct pt_regs *regs)
 {
-    ahci_end_request();
+    // ahci_end_request();
     return 0;
 }
 
@@ -619,6 +619,5 @@ system_call_t system_call_table[MAX_SYSTEM_CALL_NUM] = {
     [25] = sys_rt_sigreturn,
     [26] = sys_getpid,
     [27] = sys_sched,
-    [28 ... 254] = system_call_not_exists,
-    [255] = sys_ahci_end_req,
+    [28 ... 255] = system_call_not_exists,
 };