process.c 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179
  1. #include "process.h"
  2. #include <common/printk.h>
  3. #include <common/kprint.h>
  4. #include <common/stdio.h>
  5. #include <common/string.h>
  6. #include <common/compiler.h>
  7. #include <common/libELF/elf.h>
  8. #include <common/time.h>
  9. #include <common/sys/wait.h>
  10. #include <driver/video/video.h>
  11. #include <driver/usb/usb.h>
  12. #include <exception/gate.h>
  13. #include <filesystem/fat32/fat32.h>
  14. #include <filesystem/devfs/devfs.h>
  15. #include <filesystem/rootfs/rootfs.h>
  16. #include <mm/slab.h>
  17. #include <common/spinlock.h>
  18. #include <syscall/syscall.h>
  19. #include <syscall/syscall_num.h>
  20. #include <sched/sched.h>
  21. #include <common/unistd.h>
  22. #include <debug/traceback/traceback.h>
  23. #include <driver/disk/ahci/ahci.h>
  24. #include <ktest/ktest.h>
  25. #include <mm/mmio.h>
  26. #include <common/lz4.h>
  27. // #pragma GCC push_options
  28. // #pragma GCC optimize("O0")
  29. spinlock_t process_global_pid_write_lock; // 增加pid的写锁
  30. long process_global_pid = 1; // 系统中最大的pid
  31. extern void system_call(void);
  32. extern void kernel_thread_func(void);
  33. ul _stack_start; // initial proc的栈基地址(虚拟地址)
  34. extern struct mm_struct initial_mm;
  35. struct thread_struct initial_thread =
  36. {
  37. .rbp = (ul)(initial_proc_union.stack + STACK_SIZE / sizeof(ul)),
  38. .rsp = (ul)(initial_proc_union.stack + STACK_SIZE / sizeof(ul)),
  39. .fs = KERNEL_DS,
  40. .gs = KERNEL_DS,
  41. .cr2 = 0,
  42. .trap_num = 0,
  43. .err_code = 0};
  44. // 初始化 初始进程的union ,并将其链接到.data.init_proc段内
  45. union proc_union initial_proc_union __attribute__((__section__(".data.init_proc_union"))) = {INITIAL_PROC(initial_proc_union.pcb)};
  46. struct process_control_block *initial_proc[MAX_CPU_NUM] = {&initial_proc_union.pcb, 0};
  47. // 为每个核心初始化初始进程的tss
  48. struct tss_struct initial_tss[MAX_CPU_NUM] = {[0 ... MAX_CPU_NUM - 1] = INITIAL_TSS};
  49. /**
  50. * @brief 拷贝当前进程的标志位
  51. *
  52. * @param clone_flags 克隆标志位
  53. * @param pcb 新的进程的pcb
  54. * @return uint64_t
  55. */
  56. uint64_t process_copy_flags(uint64_t clone_flags, struct process_control_block *pcb);
  57. /**
  58. * @brief 拷贝当前进程的文件描述符等信息
  59. *
  60. * @param clone_flags 克隆标志位
  61. * @param pcb 新的进程的pcb
  62. * @return uint64_t
  63. */
  64. uint64_t process_copy_files(uint64_t clone_flags, struct process_control_block *pcb);
  65. /**
  66. * @brief 回收进程的所有文件描述符
  67. *
  68. * @param pcb 要被回收的进程的pcb
  69. * @return uint64_t
  70. */
  71. uint64_t process_exit_files(struct process_control_block *pcb);
  72. /**
  73. * @brief 拷贝当前进程的内存空间分布结构体信息
  74. *
  75. * @param clone_flags 克隆标志位
  76. * @param pcb 新的进程的pcb
  77. * @return uint64_t
  78. */
  79. uint64_t process_copy_mm(uint64_t clone_flags, struct process_control_block *pcb);
  80. /**
  81. * @brief 释放进程的页表
  82. *
  83. * @param pcb 要被释放页表的进程
  84. * @return uint64_t
  85. */
  86. uint64_t process_exit_mm(struct process_control_block *pcb);
  87. /**
  88. * @brief 拷贝当前进程的线程结构体
  89. *
  90. * @param clone_flags 克隆标志位
  91. * @param pcb 新的进程的pcb
  92. * @return uint64_t
  93. */
  94. uint64_t process_copy_thread(uint64_t clone_flags, struct process_control_block *pcb, uint64_t stack_start, uint64_t stack_size, struct pt_regs *current_regs);
  95. void process_exit_thread(struct process_control_block *pcb);
  96. /**
  97. * @brief 切换进程
  98. *
  99. * @param prev 上一个进程的pcb
  100. * @param next 将要切换到的进程的pcb
  101. * 由于程序在进入内核的时候已经保存了寄存器,因此这里不需要保存寄存器。
  102. * 这里切换fs和gs寄存器
  103. */
  104. #pragma GCC push_options
  105. #pragma GCC optimize("O0")
  106. void __switch_to(struct process_control_block *prev, struct process_control_block *next)
  107. {
  108. initial_tss[proc_current_cpu_id].rsp0 = next->thread->rbp;
  109. // kdebug("next_rsp = %#018lx ", next->thread->rsp);
  110. // set_tss64((uint *)phys_2_virt(TSS64_Table), initial_tss[0].rsp0, initial_tss[0].rsp1, initial_tss[0].rsp2, initial_tss[0].ist1,
  111. // initial_tss[0].ist2, initial_tss[0].ist3, initial_tss[0].ist4, initial_tss[0].ist5, initial_tss[0].ist6, initial_tss[0].ist7);
  112. __asm__ __volatile__("movq %%fs, %0 \n\t"
  113. : "=a"(prev->thread->fs));
  114. __asm__ __volatile__("movq %%gs, %0 \n\t"
  115. : "=a"(prev->thread->gs));
  116. __asm__ __volatile__("movq %0, %%fs \n\t" ::"a"(next->thread->fs));
  117. __asm__ __volatile__("movq %0, %%gs \n\t" ::"a"(next->thread->gs));
  118. }
  119. #pragma GCC pop_options
  120. /**
  121. * @brief 打开要执行的程序文件
  122. *
  123. * @param path
  124. * @return struct vfs_file_t*
  125. */
  126. struct vfs_file_t *process_open_exec_file(char *path)
  127. {
  128. struct vfs_dir_entry_t *dentry = NULL;
  129. struct vfs_file_t *filp = NULL;
  130. dentry = vfs_path_walk(path, 0);
  131. if (dentry == NULL)
  132. return (void *)-ENOENT;
  133. if (dentry->dir_inode->attribute == VFS_IF_DIR)
  134. return (void *)-ENOTDIR;
  135. filp = (struct vfs_file_t *)kmalloc(sizeof(struct vfs_file_t), 0);
  136. if (filp == NULL)
  137. return (void *)-ENOMEM;
  138. filp->position = 0;
  139. filp->mode = 0;
  140. filp->dEntry = dentry;
  141. filp->mode = ATTR_READ_ONLY;
  142. filp->file_ops = dentry->dir_inode->file_ops;
  143. return filp;
  144. }
  145. /**
  146. * @brief 加载elf格式的程序文件到内存中,并设置regs
  147. *
  148. * @param regs 寄存器
  149. * @param path 文件路径
  150. * @return int
  151. */
  152. static int process_load_elf_file(struct pt_regs *regs, char *path)
  153. {
  154. int retval = 0;
  155. struct vfs_file_t *filp = process_open_exec_file(path);
  156. if ((long)filp <= 0 && (long)filp >= -255)
  157. {
  158. // kdebug("(long)filp=%ld", (long)filp);
  159. return (unsigned long)filp;
  160. }
  161. void *buf = kmalloc(PAGE_4K_SIZE, 0);
  162. memset(buf, 0, PAGE_4K_SIZE);
  163. uint64_t pos = 0;
  164. pos = filp->file_ops->lseek(filp, 0, SEEK_SET);
  165. retval = filp->file_ops->read(filp, (char *)buf, sizeof(Elf64_Ehdr), &pos);
  166. retval = 0;
  167. if (!elf_check(buf))
  168. {
  169. kerror("Not an ELF file: %s", path);
  170. retval = -ENOTSUP;
  171. goto load_elf_failed;
  172. }
  173. #if ARCH(X86_64)
  174. // 暂时只支持64位的文件
  175. if (((Elf32_Ehdr *)buf)->e_ident[EI_CLASS] != ELFCLASS64)
  176. {
  177. kdebug("((Elf32_Ehdr *)buf)->e_ident[EI_CLASS]=%d", ((Elf32_Ehdr *)buf)->e_ident[EI_CLASS]);
  178. retval = -EUNSUPPORTED;
  179. goto load_elf_failed;
  180. }
  181. Elf64_Ehdr ehdr = *(Elf64_Ehdr *)buf;
  182. // 暂时只支持AMD64架构
  183. if (ehdr.e_machine != EM_AMD64)
  184. {
  185. kerror("e_machine=%d", ehdr.e_machine);
  186. retval = -EUNSUPPORTED;
  187. goto load_elf_failed;
  188. }
  189. #else
  190. #error Unsupported architecture!
  191. #endif
  192. if (ehdr.e_type != ET_EXEC)
  193. {
  194. kerror("Not executable file! filename=%s\tehdr->e_type=%d", path, ehdr.e_type);
  195. retval = -EUNSUPPORTED;
  196. goto load_elf_failed;
  197. }
  198. // kdebug("filename=%s:\te_entry=%#018lx", path, ehdr.e_entry);
  199. regs->rip = ehdr.e_entry;
  200. current_pcb->mm->code_addr_start = ehdr.e_entry;
  201. // kdebug("ehdr.e_phoff=%#018lx\t ehdr.e_phentsize=%d, ehdr.e_phnum=%d", ehdr.e_phoff, ehdr.e_phentsize, ehdr.e_phnum);
  202. // 将指针移动到program header处
  203. pos = ehdr.e_phoff;
  204. // 读取所有的phdr
  205. pos = filp->file_ops->lseek(filp, pos, SEEK_SET);
  206. filp->file_ops->read(filp, (char *)buf, (uint64_t)ehdr.e_phentsize * (uint64_t)ehdr.e_phnum, &pos);
  207. if ((unsigned long)filp <= 0)
  208. {
  209. kdebug("(unsigned long)filp=%d", (long)filp);
  210. retval = -ENOEXEC;
  211. goto load_elf_failed;
  212. }
  213. Elf64_Phdr *phdr = buf;
  214. // 将程序加载到内存中
  215. for (int i = 0; i < ehdr.e_phnum; ++i, ++phdr)
  216. {
  217. // kdebug("phdr[%d] phdr->p_offset=%#018lx phdr->p_vaddr=%#018lx phdr->p_memsz=%ld phdr->p_filesz=%ld phdr->p_type=%d", i, phdr->p_offset, phdr->p_vaddr, phdr->p_memsz, phdr->p_filesz, phdr->p_type);
  218. // 不是可加载的段
  219. if (phdr->p_type != PT_LOAD)
  220. continue;
  221. int64_t remain_mem_size = phdr->p_memsz;
  222. int64_t remain_file_size = phdr->p_filesz;
  223. pos = phdr->p_offset;
  224. uint64_t virt_base = 0;
  225. uint64_t beginning_offset = 0; // 由于页表映射导致的virtbase与实际的p_vaddr之间的偏移量
  226. if (remain_mem_size >= PAGE_2M_SIZE) // 接下来存在映射2M页的情况,因此将vaddr按2M向下对齐
  227. virt_base = phdr->p_vaddr & PAGE_2M_MASK;
  228. else // 接下来只有4K页的映射
  229. virt_base = phdr->p_vaddr & PAGE_4K_MASK;
  230. beginning_offset = phdr->p_vaddr - virt_base;
  231. remain_mem_size += beginning_offset;
  232. while (remain_mem_size > 0)
  233. {
  234. // kdebug("loading...");
  235. int64_t map_size = 0;
  236. if (remain_mem_size >= PAGE_2M_SIZE)
  237. {
  238. uint64_t pa = alloc_pages(ZONE_NORMAL, 1, PAGE_PGT_MAPPED)->addr_phys;
  239. struct vm_area_struct *vma = NULL;
  240. int ret = mm_create_vma(current_pcb->mm, virt_base, PAGE_2M_SIZE, VM_USER | VM_ACCESS_FLAGS, NULL, &vma);
  241. // 防止内存泄露
  242. if (ret == -EEXIST)
  243. free_pages(Phy_to_2M_Page(pa), 1);
  244. else
  245. mm_map_vma(vma, pa);
  246. io_mfence();
  247. memset((void *)virt_base, 0, PAGE_2M_SIZE);
  248. map_size = PAGE_2M_SIZE;
  249. }
  250. else
  251. {
  252. // todo: 使用4K、8K、32K大小内存块混合进行分配,提高空间利用率(减少了bmp的大小)
  253. map_size = ALIGN(remain_mem_size, PAGE_4K_SIZE);
  254. // 循环分配4K大小内存块
  255. for (uint64_t off = 0; off < map_size; off += PAGE_4K_SIZE)
  256. {
  257. uint64_t paddr = virt_2_phys((uint64_t)kmalloc(PAGE_4K_SIZE, 0));
  258. struct vm_area_struct *vma = NULL;
  259. int val = mm_create_vma(current_pcb->mm, virt_base + off, PAGE_4K_SIZE, VM_USER | VM_ACCESS_FLAGS, NULL, &vma);
  260. if (val == -EEXIST)
  261. kfree(phys_2_virt(paddr));
  262. else
  263. mm_map_vma(vma, paddr);
  264. io_mfence();
  265. memset((void *)(virt_base + off), 0, PAGE_4K_SIZE);
  266. }
  267. }
  268. pos = filp->file_ops->lseek(filp, pos, SEEK_SET);
  269. int64_t val = 0;
  270. if (remain_file_size > 0)
  271. {
  272. int64_t to_trans = (remain_file_size > PAGE_2M_SIZE) ? PAGE_2M_SIZE : remain_file_size;
  273. val = filp->file_ops->read(filp, (char *)(virt_base + beginning_offset), to_trans, &pos);
  274. }
  275. if (val < 0)
  276. goto load_elf_failed;
  277. remain_mem_size -= map_size;
  278. remain_file_size -= val;
  279. virt_base += map_size;
  280. }
  281. }
  282. // 分配2MB的栈内存空间
  283. regs->rsp = current_pcb->mm->stack_start;
  284. regs->rbp = current_pcb->mm->stack_start;
  285. {
  286. struct vm_area_struct *vma = NULL;
  287. uint64_t pa = alloc_pages(ZONE_NORMAL, 1, PAGE_PGT_MAPPED)->addr_phys;
  288. int val = mm_create_vma(current_pcb->mm, current_pcb->mm->stack_start - PAGE_2M_SIZE, PAGE_2M_SIZE, VM_USER | VM_ACCESS_FLAGS, NULL, &vma);
  289. if (val == -EEXIST)
  290. free_pages(Phy_to_2M_Page(pa), 1);
  291. else
  292. mm_map_vma(vma, pa);
  293. }
  294. // 清空栈空间
  295. memset((void *)(current_pcb->mm->stack_start - PAGE_2M_SIZE), 0, PAGE_2M_SIZE);
  296. load_elf_failed:;
  297. if (buf != NULL)
  298. kfree(buf);
  299. return retval;
  300. }
  301. /**
  302. * @brief 使当前进程去执行新的代码
  303. *
  304. * @param regs 当前进程的寄存器
  305. * @param path 可执行程序的路径
  306. * @param argv 参数列表
  307. * @param envp 环境变量
  308. * @return ul 错误码
  309. */
  310. #pragma GCC push_options
  311. #pragma GCC optimize("O0")
  312. ul do_execve(struct pt_regs *regs, char *path, char *argv[], char *envp[])
  313. {
  314. // kdebug("do_execve is running...");
  315. // 当前进程正在与父进程共享地址空间,需要创建
  316. // 独立的地址空间才能使新程序正常运行
  317. if (current_pcb->flags & PF_VFORK)
  318. {
  319. kdebug("proc:%d creating new mem space", current_pcb->pid);
  320. // 分配新的内存空间分布结构体
  321. struct mm_struct *new_mms = (struct mm_struct *)kmalloc(sizeof(struct mm_struct), 0);
  322. memset(new_mms, 0, sizeof(struct mm_struct));
  323. current_pcb->mm = new_mms;
  324. // 分配顶层页表, 并设置顶层页表的物理地址
  325. new_mms->pgd = (pml4t_t *)virt_2_phys(kmalloc(PAGE_4K_SIZE, 0));
  326. // 由于高2K部分为内核空间,在接下来需要覆盖其数据,因此不用清零
  327. memset(phys_2_virt(new_mms->pgd), 0, PAGE_4K_SIZE / 2);
  328. // 拷贝内核空间的页表指针
  329. memcpy(phys_2_virt(new_mms->pgd) + 256, phys_2_virt(initial_proc[proc_current_cpu_id]) + 256, PAGE_4K_SIZE / 2);
  330. }
  331. // 设置用户栈和用户堆的基地址
  332. unsigned long stack_start_addr = 0x6ffff0a00000UL;
  333. const uint64_t brk_start_addr = 0x700000000000UL;
  334. process_switch_mm(current_pcb);
  335. // 为用户态程序设置地址边界
  336. if (!(current_pcb->flags & PF_KTHREAD))
  337. current_pcb->addr_limit = USER_MAX_LINEAR_ADDR;
  338. current_pcb->mm->code_addr_end = 0;
  339. current_pcb->mm->data_addr_start = 0;
  340. current_pcb->mm->data_addr_end = 0;
  341. current_pcb->mm->rodata_addr_start = 0;
  342. current_pcb->mm->rodata_addr_end = 0;
  343. current_pcb->mm->bss_start = 0;
  344. current_pcb->mm->bss_end = 0;
  345. current_pcb->mm->brk_start = brk_start_addr;
  346. current_pcb->mm->brk_end = brk_start_addr;
  347. current_pcb->mm->stack_start = stack_start_addr;
  348. // 关闭之前的文件描述符
  349. process_exit_files(current_pcb);
  350. // 清除进程的vfork标志位
  351. current_pcb->flags &= ~PF_VFORK;
  352. // 加载elf格式的可执行文件
  353. int tmp = process_load_elf_file(regs, path);
  354. if (tmp < 0)
  355. goto exec_failed;
  356. // 拷贝参数列表
  357. if (argv != NULL)
  358. {
  359. int argc = 0;
  360. // 目标程序的argv基地址指针,最大8个参数
  361. char **dst_argv = (char **)(stack_start_addr - (sizeof(char **) << 3));
  362. uint64_t str_addr = (uint64_t)dst_argv;
  363. for (argc = 0; argc < 8 && argv[argc] != NULL; ++argc)
  364. {
  365. if (*argv[argc] == NULL)
  366. break;
  367. // 测量参数的长度(最大1023)
  368. int argv_len = strnlen_user(argv[argc], 1023) + 1;
  369. strncpy((char *)(str_addr - argv_len), argv[argc], argv_len - 1);
  370. str_addr -= argv_len;
  371. dst_argv[argc] = (char *)str_addr;
  372. // 字符串加上结尾字符
  373. ((char *)str_addr)[argv_len] = '\0';
  374. }
  375. // 重新设定栈基址,并预留空间防止越界
  376. stack_start_addr = str_addr - 8;
  377. current_pcb->mm->stack_start = stack_start_addr;
  378. regs->rsp = regs->rbp = stack_start_addr;
  379. // 传递参数
  380. regs->rdi = argc;
  381. regs->rsi = (uint64_t)dst_argv;
  382. }
  383. // kdebug("execve ok");
  384. regs->cs = USER_CS | 3;
  385. regs->ds = USER_DS | 3;
  386. regs->ss = USER_DS | 0x3;
  387. regs->rflags = 0x200246;
  388. regs->rax = 1;
  389. regs->es = 0;
  390. return 0;
  391. exec_failed:;
  392. process_do_exit(tmp);
  393. }
  394. #pragma GCC pop_options
  395. /**
  396. * @brief 内核init进程
  397. *
  398. * @param arg
  399. * @return ul 参数
  400. */
  401. #pragma GCC push_options
  402. #pragma GCC optimize("O0")
  403. ul initial_kernel_thread(ul arg)
  404. {
  405. // kinfo("initial proc running...\targ:%#018lx", arg);
  406. ahci_init();
  407. fat32_init();
  408. rootfs_umount();
  409. // 使用单独的内核线程来初始化usb驱动程序
  410. int usb_pid = kernel_thread(usb_init, 0, 0);
  411. kinfo("LZ4 lib Version=%s", LZ4_versionString());
  412. // 对一些组件进行单元测试
  413. uint64_t tpid[] = {
  414. ktest_start(ktest_test_bitree, 0),
  415. ktest_start(ktest_test_kfifo, 0),
  416. ktest_start(ktest_test_mutex, 0),
  417. usb_pid,
  418. };
  419. kinfo("Waiting test thread exit...");
  420. // 等待测试进程退出
  421. for (int i = 0; i < sizeof(tpid) / sizeof(uint64_t); ++i)
  422. waitpid(tpid[i], NULL, NULL);
  423. kinfo("All test done.");
  424. // 准备切换到用户态
  425. struct pt_regs *regs;
  426. // 若在后面这段代码中触发中断,return时会导致段选择子错误,从而触发#GP,因此这里需要cli
  427. cli();
  428. current_pcb->thread->rip = (ul)ret_from_system_call;
  429. current_pcb->thread->rsp = (ul)current_pcb + STACK_SIZE - sizeof(struct pt_regs);
  430. current_pcb->thread->fs = USER_DS | 0x3;
  431. barrier();
  432. current_pcb->thread->gs = USER_DS | 0x3;
  433. // 主动放弃内核线程身份
  434. current_pcb->flags &= (~PF_KTHREAD);
  435. kdebug("in initial_kernel_thread: flags=%ld", current_pcb->flags);
  436. regs = (struct pt_regs *)current_pcb->thread->rsp;
  437. // kdebug("current_pcb->thread->rsp=%#018lx", current_pcb->thread->rsp);
  438. current_pcb->flags = 0;
  439. // 将返回用户层的代码压入堆栈,向rdx传入regs的地址,然后jmp到do_execve这个系统调用api的处理函数 这里的设计思路和switch_proc类似
  440. // 加载用户态程序:shell.elf
  441. char init_path[] = "/shell.elf";
  442. uint64_t addr = (uint64_t)&init_path;
  443. __asm__ __volatile__("movq %1, %%rsp \n\t"
  444. "pushq %2 \n\t"
  445. "jmp do_execve \n\t" ::"D"(current_pcb->thread->rsp),
  446. "m"(current_pcb->thread->rsp), "m"(current_pcb->thread->rip), "S"("/shell.elf"), "c"(NULL), "d"(NULL)
  447. : "memory");
  448. return 1;
  449. }
  450. #pragma GCC pop_options
  451. /**
  452. * @brief 当子进程退出后向父进程发送通知
  453. *
  454. */
  455. void process_exit_notify()
  456. {
  457. wait_queue_wakeup(&current_pcb->parent_pcb->wait_child_proc_exit, PROC_INTERRUPTIBLE);
  458. }
  459. /**
  460. * @brief 进程退出时执行的函数
  461. *
  462. * @param code 返回码
  463. * @return ul
  464. */
  465. ul process_do_exit(ul code)
  466. {
  467. // kinfo("process exiting..., code is %ld.", (long)code);
  468. cli();
  469. struct process_control_block *pcb = current_pcb;
  470. // 进程退出时释放资源
  471. process_exit_files(pcb);
  472. process_exit_thread(pcb);
  473. // todo: 可否在这里释放内存结构体?(在判断共享页引用问题之后)
  474. pcb->state = PROC_ZOMBIE;
  475. pcb->exit_code = code;
  476. sti();
  477. process_exit_notify();
  478. sched();
  479. while (1)
  480. pause();
  481. }
  482. /**
  483. * @brief 初始化内核进程
  484. *
  485. * @param fn 目标程序的地址
  486. * @param arg 向目标程序传入的参数
  487. * @param flags
  488. * @return int
  489. */
  490. int kernel_thread(unsigned long (*fn)(unsigned long), unsigned long arg, unsigned long flags)
  491. {
  492. struct pt_regs regs;
  493. barrier();
  494. memset(&regs, 0, sizeof(regs));
  495. barrier();
  496. // 在rbx寄存器中保存进程的入口地址
  497. regs.rbx = (ul)fn;
  498. // 在rdx寄存器中保存传入的参数
  499. regs.rdx = (ul)arg;
  500. barrier();
  501. regs.ds = KERNEL_DS;
  502. barrier();
  503. regs.es = KERNEL_DS;
  504. barrier();
  505. regs.cs = KERNEL_CS;
  506. barrier();
  507. regs.ss = KERNEL_DS;
  508. barrier();
  509. // 置位中断使能标志位
  510. regs.rflags = (1 << 9);
  511. barrier();
  512. // rip寄存器指向内核线程的引导程序
  513. regs.rip = (ul)kernel_thread_func;
  514. barrier();
  515. // kdebug("kernel_thread_func=%#018lx", kernel_thread_func);
  516. // kdebug("&kernel_thread_func=%#018lx", &kernel_thread_func);
  517. // kdebug("1111\tregs.rip = %#018lx", regs.rip);
  518. return do_fork(&regs, flags | CLONE_VM, 0, 0);
  519. }
  520. /**
  521. * @brief 初始化进程模块
  522. * ☆前置条件:已完成系统调用模块的初始化
  523. */
  524. void process_init()
  525. {
  526. kinfo("Initializing process...");
  527. initial_mm.pgd = (pml4t_t *)get_CR3();
  528. initial_mm.code_addr_start = memory_management_struct.kernel_code_start;
  529. initial_mm.code_addr_end = memory_management_struct.kernel_code_end;
  530. initial_mm.data_addr_start = (ul)&_data;
  531. initial_mm.data_addr_end = memory_management_struct.kernel_data_end;
  532. initial_mm.rodata_addr_start = (ul)&_rodata;
  533. initial_mm.rodata_addr_end = (ul)&_erodata;
  534. initial_mm.bss_start = (uint64_t)&_bss;
  535. initial_mm.bss_end = (uint64_t)&_ebss;
  536. initial_mm.brk_start = memory_management_struct.start_brk;
  537. initial_mm.brk_end = current_pcb->addr_limit;
  538. initial_mm.stack_start = _stack_start;
  539. initial_mm.vmas = NULL;
  540. initial_tss[proc_current_cpu_id].rsp0 = initial_thread.rbp;
  541. // ========= 在IDLE进程的顶层页表中添加对内核地址空间的映射 =====================
  542. // 由于IDLE进程的顶层页表的高地址部分会被后续进程所复制,为了使所有进程能够共享相同的内核空间,
  543. // 因此需要先在IDLE进程的顶层页表内映射二级页表
  544. uint64_t *idle_pml4t_vaddr = (uint64_t *)phys_2_virt((uint64_t)get_CR3() & (~0xfffUL));
  545. for (int i = 256; i < 512; ++i)
  546. {
  547. uint64_t *tmp = idle_pml4t_vaddr + i;
  548. barrier();
  549. if (*tmp == 0)
  550. {
  551. void *pdpt = kmalloc(PAGE_4K_SIZE, 0);
  552. barrier();
  553. memset(pdpt, 0, PAGE_4K_SIZE);
  554. barrier();
  555. set_pml4t(tmp, mk_pml4t(virt_2_phys(pdpt), PAGE_KERNEL_PGT));
  556. }
  557. }
  558. barrier();
  559. flush_tlb();
  560. /*
  561. kdebug("initial_thread.rbp=%#018lx", initial_thread.rbp);
  562. kdebug("initial_tss[0].rsp1=%#018lx", initial_tss[0].rsp1);
  563. kdebug("initial_tss[0].ist1=%#018lx", initial_tss[0].ist1);
  564. */
  565. // 初始化pid的写锁
  566. spin_init(&process_global_pid_write_lock);
  567. // 初始化进程的循环链表
  568. list_init(&initial_proc_union.pcb.list);
  569. barrier();
  570. kernel_thread(initial_kernel_thread, 10, CLONE_FS | CLONE_SIGNAL); // 初始化内核线程
  571. barrier();
  572. initial_proc_union.pcb.state = PROC_RUNNING;
  573. initial_proc_union.pcb.preempt_count = 0;
  574. initial_proc_union.pcb.cpu_id = 0;
  575. initial_proc_union.pcb.virtual_runtime = (1UL << 60);
  576. current_pcb->virtual_runtime = (1UL << 60);
  577. }
  578. /**
  579. * @brief fork当前进程
  580. *
  581. * @param regs 新的寄存器值
  582. * @param clone_flags 克隆标志
  583. * @param stack_start 堆栈开始地址
  584. * @param stack_size 堆栈大小
  585. * @return unsigned long
  586. */
  587. unsigned long do_fork(struct pt_regs *regs, unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size)
  588. {
  589. int retval = 0;
  590. struct process_control_block *tsk = NULL;
  591. // 为新的进程分配栈空间,并将pcb放置在底部
  592. tsk = (struct process_control_block *)kmalloc(STACK_SIZE, 0);
  593. barrier();
  594. if (tsk == NULL)
  595. {
  596. retval = -ENOMEM;
  597. return retval;
  598. }
  599. barrier();
  600. memset(tsk, 0, sizeof(struct process_control_block));
  601. io_mfence();
  602. // 将当前进程的pcb复制到新的pcb内
  603. memcpy(tsk, current_pcb, sizeof(struct process_control_block));
  604. io_mfence();
  605. // 初始化进程的循环链表结点
  606. list_init(&tsk->list);
  607. io_mfence();
  608. // 判断是否为内核态调用fork
  609. if (current_pcb->flags & PF_KTHREAD && stack_start != 0)
  610. tsk->flags |= PF_KFORK;
  611. tsk->priority = 2;
  612. tsk->preempt_count = 0;
  613. // 增加全局的pid并赋值给新进程的pid
  614. spin_lock(&process_global_pid_write_lock);
  615. tsk->pid = process_global_pid++;
  616. barrier();
  617. // 加入到进程链表中
  618. tsk->next_pcb = initial_proc_union.pcb.next_pcb;
  619. barrier();
  620. initial_proc_union.pcb.next_pcb = tsk;
  621. barrier();
  622. tsk->parent_pcb = current_pcb;
  623. barrier();
  624. spin_unlock(&process_global_pid_write_lock);
  625. tsk->cpu_id = proc_current_cpu_id;
  626. tsk->state = PROC_UNINTERRUPTIBLE;
  627. tsk->parent_pcb = current_pcb;
  628. wait_queue_init(&tsk->wait_child_proc_exit, NULL);
  629. barrier();
  630. list_init(&tsk->list);
  631. retval = -ENOMEM;
  632. // 拷贝标志位
  633. if (process_copy_flags(clone_flags, tsk))
  634. goto copy_flags_failed;
  635. // 拷贝内存空间分布结构体
  636. if (process_copy_mm(clone_flags, tsk))
  637. goto copy_mm_failed;
  638. // 拷贝文件
  639. if (process_copy_files(clone_flags, tsk))
  640. goto copy_files_failed;
  641. // 拷贝线程结构体
  642. if (process_copy_thread(clone_flags, tsk, stack_start, stack_size, regs))
  643. goto copy_thread_failed;
  644. // 拷贝成功
  645. retval = tsk->pid;
  646. tsk->flags &= ~PF_KFORK;
  647. // 唤醒进程
  648. process_wakeup(tsk);
  649. return retval;
  650. copy_thread_failed:;
  651. // 回收线程
  652. process_exit_thread(tsk);
  653. copy_files_failed:;
  654. // 回收文件
  655. process_exit_files(tsk);
  656. copy_mm_failed:;
  657. // 回收内存空间分布结构体
  658. process_exit_mm(tsk);
  659. copy_flags_failed:;
  660. kfree(tsk);
  661. return retval;
  662. return 0;
  663. }
  664. /**
  665. * @brief 根据pid获取进程的pcb
  666. *
  667. * @param pid
  668. * @return struct process_control_block*
  669. */
  670. struct process_control_block *process_get_pcb(long pid)
  671. {
  672. struct process_control_block *pcb = initial_proc_union.pcb.next_pcb;
  673. // 使用蛮力法搜索指定pid的pcb
  674. // todo: 使用哈希表来管理pcb
  675. for (; pcb != &initial_proc_union.pcb; pcb = pcb->next_pcb)
  676. {
  677. if (pcb->pid == pid)
  678. return pcb;
  679. }
  680. return NULL;
  681. }
  682. /**
  683. * @brief 将进程加入到调度器的就绪队列中
  684. *
  685. * @param pcb 进程的pcb
  686. */
  687. void process_wakeup(struct process_control_block *pcb)
  688. {
  689. pcb->state = PROC_RUNNING;
  690. sched_enqueue(pcb);
  691. }
  692. /**
  693. * @brief 将进程加入到调度器的就绪队列中,并标志当前进程需要被调度
  694. *
  695. * @param pcb 进程的pcb
  696. */
  697. void process_wakeup_immediately(struct process_control_block *pcb)
  698. {
  699. pcb->state = PROC_RUNNING;
  700. sched_enqueue(pcb);
  701. // 将当前进程标志为需要调度,缩短新进程被wakeup的时间
  702. current_pcb->flags |= PF_NEED_SCHED;
  703. }
  704. /**
  705. * @brief 拷贝当前进程的标志位
  706. *
  707. * @param clone_flags 克隆标志位
  708. * @param pcb 新的进程的pcb
  709. * @return uint64_t
  710. */
  711. uint64_t process_copy_flags(uint64_t clone_flags, struct process_control_block *pcb)
  712. {
  713. if (clone_flags & CLONE_VM)
  714. pcb->flags |= PF_VFORK;
  715. return 0;
  716. }
  717. /**
  718. * @brief 拷贝当前进程的文件描述符等信息
  719. *
  720. * @param clone_flags 克隆标志位
  721. * @param pcb 新的进程的pcb
  722. * @return uint64_t
  723. */
  724. uint64_t process_copy_files(uint64_t clone_flags, struct process_control_block *pcb)
  725. {
  726. int retval = 0;
  727. // 如果CLONE_FS被置位,那么子进程与父进程共享文件描述符
  728. // 文件描述符已经在复制pcb时被拷贝
  729. if (clone_flags & CLONE_FS)
  730. return retval;
  731. // 为新进程拷贝新的文件描述符
  732. for (int i = 0; i < PROC_MAX_FD_NUM; ++i)
  733. {
  734. if (current_pcb->fds[i] == NULL)
  735. continue;
  736. pcb->fds[i] = (struct vfs_file_t *)kmalloc(sizeof(struct vfs_file_t), 0);
  737. memcpy(pcb->fds[i], current_pcb->fds[i], sizeof(struct vfs_file_t));
  738. }
  739. return retval;
  740. }
  741. /**
  742. * @brief 回收进程的所有文件描述符
  743. *
  744. * @param pcb 要被回收的进程的pcb
  745. * @return uint64_t
  746. */
  747. uint64_t process_exit_files(struct process_control_block *pcb)
  748. {
  749. // 不与父进程共享文件描述符
  750. if (!(pcb->flags & PF_VFORK))
  751. {
  752. for (int i = 0; i < PROC_MAX_FD_NUM; ++i)
  753. {
  754. if (pcb->fds[i] == NULL)
  755. continue;
  756. kfree(pcb->fds[i]);
  757. }
  758. }
  759. // 清空当前进程的文件描述符列表
  760. memset(pcb->fds, 0, sizeof(struct vfs_file_t *) * PROC_MAX_FD_NUM);
  761. }
  762. /**
  763. * @brief 拷贝当前进程的内存空间分布结构体信息
  764. *
  765. * @param clone_flags 克隆标志位
  766. * @param pcb 新的进程的pcb
  767. * @return uint64_t
  768. */
  769. uint64_t process_copy_mm(uint64_t clone_flags, struct process_control_block *pcb)
  770. {
  771. int retval = 0;
  772. // 与父进程共享内存空间
  773. if (clone_flags & CLONE_VM)
  774. {
  775. pcb->mm = current_pcb->mm;
  776. return retval;
  777. }
  778. // 分配新的内存空间分布结构体
  779. struct mm_struct *new_mms = (struct mm_struct *)kmalloc(sizeof(struct mm_struct), 0);
  780. memset(new_mms, 0, sizeof(struct mm_struct));
  781. memcpy(new_mms, current_pcb->mm, sizeof(struct mm_struct));
  782. new_mms->vmas = NULL;
  783. pcb->mm = new_mms;
  784. // 分配顶层页表, 并设置顶层页表的物理地址
  785. new_mms->pgd = (pml4t_t *)virt_2_phys(kmalloc(PAGE_4K_SIZE, 0));
  786. // 由于高2K部分为内核空间,在接下来需要覆盖其数据,因此不用清零
  787. memset(phys_2_virt(new_mms->pgd), 0, PAGE_4K_SIZE / 2);
  788. // 拷贝内核空间的页表指针
  789. memcpy(phys_2_virt(new_mms->pgd) + 256, phys_2_virt(initial_proc[proc_current_cpu_id]->mm->pgd) + 256, PAGE_4K_SIZE / 2);
  790. uint64_t *current_pgd = (uint64_t *)phys_2_virt(current_pcb->mm->pgd);
  791. uint64_t *new_pml4t = (uint64_t *)phys_2_virt(new_mms->pgd);
  792. // 拷贝用户空间的vma
  793. struct vm_area_struct *vma = current_pcb->mm->vmas;
  794. while (vma != NULL)
  795. {
  796. if (vma->vm_end > USER_MAX_LINEAR_ADDR || vma->vm_flags & VM_DONTCOPY)
  797. {
  798. vma = vma->vm_next;
  799. continue;
  800. }
  801. int64_t vma_size = vma->vm_end - vma->vm_start;
  802. // kdebug("vma_size=%ld, vm_start=%#018lx", vma_size, vma->vm_start);
  803. if (vma_size > PAGE_2M_SIZE / 2)
  804. {
  805. int page_to_alloc = (PAGE_2M_ALIGN(vma_size)) >> PAGE_2M_SHIFT;
  806. for (int i = 0; i < page_to_alloc; ++i)
  807. {
  808. uint64_t pa = alloc_pages(ZONE_NORMAL, 1, PAGE_PGT_MAPPED)->addr_phys;
  809. struct vm_area_struct *new_vma = NULL;
  810. int ret = mm_create_vma(new_mms, vma->vm_start + i * PAGE_2M_SIZE, PAGE_2M_SIZE, vma->vm_flags, vma->vm_ops, &new_vma);
  811. // 防止内存泄露
  812. if (unlikely(ret == -EEXIST))
  813. free_pages(Phy_to_2M_Page(pa), 1);
  814. else
  815. mm_map_vma(new_vma, pa);
  816. memcpy((void *)phys_2_virt(pa), (void *)(vma->vm_start + i * PAGE_2M_SIZE), (vma_size >= PAGE_2M_SIZE) ? PAGE_2M_SIZE : vma_size);
  817. vma_size -= PAGE_2M_SIZE;
  818. }
  819. }
  820. else
  821. {
  822. uint64_t map_size = PAGE_4K_ALIGN(vma_size);
  823. uint64_t va = (uint64_t)kmalloc(map_size, 0);
  824. struct vm_area_struct *new_vma = NULL;
  825. int ret = mm_create_vma(new_mms, vma->vm_start, map_size, vma->vm_flags, vma->vm_ops, &new_vma);
  826. // 防止内存泄露
  827. if (unlikely(ret == -EEXIST))
  828. kfree((void *)va);
  829. else
  830. mm_map_vma(new_vma, virt_2_phys(va));
  831. memcpy((void *)va, (void *)vma->vm_start, vma_size);
  832. }
  833. vma = vma->vm_next;
  834. }
  835. return retval;
  836. }
  837. /**
  838. * @brief 释放进程的页表
  839. *
  840. * @param pcb 要被释放页表的进程
  841. * @return uint64_t
  842. */
  843. uint64_t process_exit_mm(struct process_control_block *pcb)
  844. {
  845. if (pcb->flags & CLONE_VM)
  846. return 0;
  847. if (pcb->mm == NULL)
  848. {
  849. kdebug("pcb->mm==NULL");
  850. return 0;
  851. }
  852. if (pcb->mm->pgd == NULL)
  853. {
  854. kdebug("pcb->mm->pgd==NULL");
  855. return 0;
  856. }
  857. // // 获取顶层页表
  858. pml4t_t *current_pgd = (pml4t_t *)phys_2_virt(pcb->mm->pgd);
  859. // 循环释放VMA中的内存
  860. struct vm_area_struct *vma = pcb->mm->vmas;
  861. while (vma != NULL)
  862. {
  863. struct vm_area_struct *cur_vma = vma;
  864. vma = cur_vma->vm_next;
  865. uint64_t pa;
  866. // kdebug("vm start=%#018lx, sem=%d", cur_vma->vm_start, cur_vma->anon_vma->sem.counter);
  867. mm_unmap_vma(pcb->mm, cur_vma, &pa);
  868. uint64_t size = (cur_vma->vm_end - cur_vma->vm_start);
  869. // 释放内存
  870. switch (size)
  871. {
  872. case PAGE_4K_SIZE:
  873. kfree(phys_2_virt(pa));
  874. break;
  875. default:
  876. break;
  877. }
  878. vm_area_del(cur_vma);
  879. vm_area_free(cur_vma);
  880. }
  881. // 释放顶层页表
  882. kfree(current_pgd);
  883. if (unlikely(pcb->mm->vmas != NULL))
  884. {
  885. kwarn("pcb.mm.vmas!=NULL");
  886. }
  887. // 释放内存空间分布结构体
  888. kfree(pcb->mm);
  889. return 0;
  890. }
  891. /**
  892. * @brief 重写内核栈中的rbp地址
  893. *
  894. * @param new_regs 子进程的reg
  895. * @param new_pcb 子进程的pcb
  896. * @return int
  897. */
  898. static int process_rewrite_rbp(struct pt_regs *new_regs, struct process_control_block *new_pcb)
  899. {
  900. uint64_t new_top = ((uint64_t)new_pcb) + STACK_SIZE;
  901. uint64_t old_top = (uint64_t)(current_pcb) + STACK_SIZE;
  902. uint64_t *rbp = &new_regs->rbp;
  903. uint64_t *tmp = rbp;
  904. // 超出内核栈范围
  905. if ((uint64_t)*rbp >= old_top || (uint64_t)*rbp < (old_top - STACK_SIZE))
  906. return 0;
  907. while (1)
  908. {
  909. // 计算delta
  910. uint64_t delta = old_top - *rbp;
  911. // 计算新的rbp值
  912. uint64_t newVal = new_top - delta;
  913. // 新的值不合法
  914. if (unlikely((uint64_t)newVal >= new_top || (uint64_t)newVal < (new_top - STACK_SIZE)))
  915. break;
  916. // 将新的值写入对应位置
  917. *rbp = newVal;
  918. // 跳转栈帧
  919. rbp = (uint64_t *)*rbp;
  920. }
  921. // 设置内核态fork返回到enter_syscall_int()函数内的时候,rsp寄存器的值
  922. new_regs->rsp = new_top - (old_top - new_regs->rsp);
  923. return 0;
  924. }
  925. /**
  926. * @brief 拷贝当前进程的线程结构体
  927. *
  928. * @param clone_flags 克隆标志位
  929. * @param pcb 新的进程的pcb
  930. * @return uint64_t
  931. */
  932. uint64_t process_copy_thread(uint64_t clone_flags, struct process_control_block *pcb, uint64_t stack_start, uint64_t stack_size, struct pt_regs *current_regs)
  933. {
  934. // 将线程结构体放置在pcb后方
  935. struct thread_struct *thd = (struct thread_struct *)(pcb + 1);
  936. memset(thd, 0, sizeof(struct thread_struct));
  937. pcb->thread = thd;
  938. struct pt_regs *child_regs = NULL;
  939. // 拷贝栈空间
  940. if (pcb->flags & PF_KFORK) // 内核态下的fork
  941. {
  942. // 内核态下则拷贝整个内核栈
  943. uint32_t size = ((uint64_t)current_pcb) + STACK_SIZE - (uint64_t)(current_regs);
  944. child_regs = (struct pt_regs *)(((uint64_t)pcb) + STACK_SIZE - size);
  945. memcpy(child_regs, (void *)current_regs, size);
  946. barrier();
  947. // 然后重写新的栈中,每个栈帧的rbp值
  948. process_rewrite_rbp(child_regs, pcb);
  949. }
  950. else
  951. {
  952. child_regs = (struct pt_regs *)((uint64_t)pcb + STACK_SIZE - sizeof(struct pt_regs));
  953. memcpy(child_regs, current_regs, sizeof(struct pt_regs));
  954. barrier();
  955. child_regs->rsp = stack_start;
  956. }
  957. // 设置子进程的返回值为0
  958. child_regs->rax = 0;
  959. if (pcb->flags & PF_KFORK)
  960. thd->rbp = (uint64_t)(child_regs + 1); // 设置新的内核线程开始执行时的rbp(也就是进入ret_from_system_call时的rbp)
  961. else
  962. thd->rbp = (uint64_t)pcb + STACK_SIZE;
  963. // 设置新的内核线程开始执行的时候的rsp
  964. thd->rsp = (uint64_t)child_regs;
  965. thd->fs = current_pcb->thread->fs;
  966. thd->gs = current_pcb->thread->gs;
  967. // 根据是否为内核线程、是否在内核态fork,设置进程的开始执行的地址
  968. if (pcb->flags & PF_KFORK)
  969. thd->rip = (uint64_t)ret_from_system_call;
  970. else if (pcb->flags & PF_KTHREAD && (!(pcb->flags & PF_KFORK)))
  971. thd->rip = (uint64_t)kernel_thread_func;
  972. else
  973. thd->rip = (uint64_t)ret_from_system_call;
  974. return 0;
  975. }
  976. /**
  977. * @brief todo: 回收线程结构体
  978. *
  979. * @param pcb
  980. */
  981. void process_exit_thread(struct process_control_block *pcb)
  982. {
  983. }
  984. /**
  985. * @brief 申请可用的文件句柄
  986. *
  987. * @return int
  988. */
  989. int process_fd_alloc(struct vfs_file_t *file)
  990. {
  991. int fd_num = -1;
  992. struct vfs_file_t **f = current_pcb->fds;
  993. for (int i = 0; i < PROC_MAX_FD_NUM; ++i)
  994. {
  995. /* 找到指针数组中的空位 */
  996. if (f[i] == NULL)
  997. {
  998. fd_num = i;
  999. f[i] = file;
  1000. break;
  1001. }
  1002. }
  1003. return fd_num;
  1004. }