process.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131
  1. #include "process.h"
  2. #include <common/printk.h>
  3. #include <common/kprint.h>
  4. #include <common/stdio.h>
  5. #include <common/compiler.h>
  6. #include <common/libELF/elf.h>
  7. #include <common/time.h>
  8. #include <common/sys/wait.h>
  9. #include <driver/video/video.h>
  10. #include <driver/usb/usb.h>
  11. #include <exception/gate.h>
  12. #include <filesystem/fat32/fat32.h>
  13. #include <mm/slab.h>
  14. #include <common/spinlock.h>
  15. #include <syscall/syscall.h>
  16. #include <syscall/syscall_num.h>
  17. #include <sched/sched.h>
  18. #include <common/unistd.h>
  19. #include <debug/traceback/traceback.h>
  20. #include <ktest/ktest.h>
  21. // #pragma GCC push_options
  22. // #pragma GCC optimize("O0")
  23. spinlock_t process_global_pid_write_lock; // 增加pid的写锁
  24. long process_global_pid = 1; // 系统中最大的pid
  25. extern void system_call(void);
  26. extern void kernel_thread_func(void);
  27. ul _stack_start; // initial proc的栈基地址(虚拟地址)
  28. struct mm_struct initial_mm = {0};
  29. struct thread_struct initial_thread =
  30. {
  31. .rbp = (ul)(initial_proc_union.stack + STACK_SIZE / sizeof(ul)),
  32. .rsp = (ul)(initial_proc_union.stack + STACK_SIZE / sizeof(ul)),
  33. .fs = KERNEL_DS,
  34. .gs = KERNEL_DS,
  35. .cr2 = 0,
  36. .trap_num = 0,
  37. .err_code = 0};
  38. // 初始化 初始进程的union ,并将其链接到.data.init_proc段内
  39. union proc_union initial_proc_union __attribute__((__section__(".data.init_proc_union"))) = {INITIAL_PROC(initial_proc_union.pcb)};
  40. struct process_control_block *initial_proc[MAX_CPU_NUM] = {&initial_proc_union.pcb, 0};
  41. // 为每个核心初始化初始进程的tss
  42. struct tss_struct initial_tss[MAX_CPU_NUM] = {[0 ... MAX_CPU_NUM - 1] = INITIAL_TSS};
  43. /**
  44. * @brief 拷贝当前进程的标志位
  45. *
  46. * @param clone_flags 克隆标志位
  47. * @param pcb 新的进程的pcb
  48. * @return uint64_t
  49. */
  50. uint64_t process_copy_flags(uint64_t clone_flags, struct process_control_block *pcb);
  51. /**
  52. * @brief 拷贝当前进程的文件描述符等信息
  53. *
  54. * @param clone_flags 克隆标志位
  55. * @param pcb 新的进程的pcb
  56. * @return uint64_t
  57. */
  58. uint64_t process_copy_files(uint64_t clone_flags, struct process_control_block *pcb);
  59. /**
  60. * @brief 回收进程的所有文件描述符
  61. *
  62. * @param pcb 要被回收的进程的pcb
  63. * @return uint64_t
  64. */
  65. uint64_t process_exit_files(struct process_control_block *pcb);
  66. /**
  67. * @brief 拷贝当前进程的内存空间分布结构体信息
  68. *
  69. * @param clone_flags 克隆标志位
  70. * @param pcb 新的进程的pcb
  71. * @return uint64_t
  72. */
  73. uint64_t process_copy_mm(uint64_t clone_flags, struct process_control_block *pcb);
  74. /**
  75. * @brief 释放进程的页表
  76. *
  77. * @param pcb 要被释放页表的进程
  78. * @return uint64_t
  79. */
  80. uint64_t process_exit_mm(struct process_control_block *pcb);
  81. /**
  82. * @brief 拷贝当前进程的线程结构体
  83. *
  84. * @param clone_flags 克隆标志位
  85. * @param pcb 新的进程的pcb
  86. * @return uint64_t
  87. */
  88. uint64_t process_copy_thread(uint64_t clone_flags, struct process_control_block *pcb, uint64_t stack_start, uint64_t stack_size, struct pt_regs *current_regs);
  89. void process_exit_thread(struct process_control_block *pcb);
  90. /**
  91. * @brief 切换进程
  92. *
  93. * @param prev 上一个进程的pcb
  94. * @param next 将要切换到的进程的pcb
  95. * 由于程序在进入内核的时候已经保存了寄存器,因此这里不需要保存寄存器。
  96. * 这里切换fs和gs寄存器
  97. */
  98. #pragma GCC push_options
  99. #pragma GCC optimize("O0")
  100. void __switch_to(struct process_control_block *prev, struct process_control_block *next)
  101. {
  102. initial_tss[proc_current_cpu_id].rsp0 = next->thread->rbp;
  103. // kdebug("next_rsp = %#018lx ", next->thread->rsp);
  104. // set_tss64((uint *)phys_2_virt(TSS64_Table), initial_tss[0].rsp0, initial_tss[0].rsp1, initial_tss[0].rsp2, initial_tss[0].ist1,
  105. // initial_tss[0].ist2, initial_tss[0].ist3, initial_tss[0].ist4, initial_tss[0].ist5, initial_tss[0].ist6, initial_tss[0].ist7);
  106. __asm__ __volatile__("movq %%fs, %0 \n\t"
  107. : "=a"(prev->thread->fs));
  108. __asm__ __volatile__("movq %%gs, %0 \n\t"
  109. : "=a"(prev->thread->gs));
  110. __asm__ __volatile__("movq %0, %%fs \n\t" ::"a"(next->thread->fs));
  111. __asm__ __volatile__("movq %0, %%gs \n\t" ::"a"(next->thread->gs));
  112. }
  113. #pragma GCC pop_options
  114. /**
  115. * @brief 打开要执行的程序文件
  116. *
  117. * @param path
  118. * @return struct vfs_file_t*
  119. */
  120. struct vfs_file_t *process_open_exec_file(char *path)
  121. {
  122. struct vfs_dir_entry_t *dentry = NULL;
  123. struct vfs_file_t *filp = NULL;
  124. dentry = vfs_path_walk(path, 0);
  125. if (dentry == NULL)
  126. return (void *)-ENOENT;
  127. if (dentry->dir_inode->attribute == VFS_ATTR_DIR)
  128. return (void *)-ENOTDIR;
  129. filp = (struct vfs_file_t *)kmalloc(sizeof(struct vfs_file_t), 0);
  130. if (filp == NULL)
  131. return (void *)-ENOMEM;
  132. filp->position = 0;
  133. filp->mode = 0;
  134. filp->dEntry = dentry;
  135. filp->mode = ATTR_READ_ONLY;
  136. filp->file_ops = dentry->dir_inode->file_ops;
  137. return filp;
  138. }
  139. /**
  140. * @brief 加载elf格式的程序文件到内存中,并设置regs
  141. *
  142. * @param regs 寄存器
  143. * @param path 文件路径
  144. * @return int
  145. */
  146. static int process_load_elf_file(struct pt_regs *regs, char *path)
  147. {
  148. int retval = 0;
  149. struct vfs_file_t *filp = process_open_exec_file(path);
  150. if ((long)filp <= 0 && (long)filp >= -255)
  151. {
  152. // kdebug("(long)filp=%ld", (long)filp);
  153. return (unsigned long)filp;
  154. }
  155. void *buf = kmalloc(PAGE_4K_SIZE, 0);
  156. memset(buf, 0, PAGE_4K_SIZE);
  157. uint64_t pos = 0;
  158. pos = filp->file_ops->lseek(filp, 0, SEEK_SET);
  159. retval = filp->file_ops->read(filp, (char *)buf, sizeof(Elf64_Ehdr), &pos);
  160. retval = 0;
  161. if (!elf_check(buf))
  162. {
  163. kerror("Not an ELF file: %s", path);
  164. retval = -ENOTSUP;
  165. goto load_elf_failed;
  166. }
  167. #if ARCH(X86_64)
  168. // 暂时只支持64位的文件
  169. if (((Elf32_Ehdr *)buf)->e_ident[EI_CLASS] != ELFCLASS64)
  170. {
  171. kdebug("((Elf32_Ehdr *)buf)->e_ident[EI_CLASS]=%d", ((Elf32_Ehdr *)buf)->e_ident[EI_CLASS]);
  172. retval = -EUNSUPPORTED;
  173. goto load_elf_failed;
  174. }
  175. Elf64_Ehdr ehdr = *(Elf64_Ehdr *)buf;
  176. // 暂时只支持AMD64架构
  177. if (ehdr.e_machine != EM_AMD64)
  178. {
  179. kerror("e_machine=%d", ehdr.e_machine);
  180. retval = -EUNSUPPORTED;
  181. goto load_elf_failed;
  182. }
  183. #else
  184. #error Unsupported architecture!
  185. #endif
  186. if (ehdr.e_type != ET_EXEC)
  187. {
  188. kerror("Not executable file! filename=%s\tehdr->e_type=%d", path, ehdr.e_type);
  189. retval = -EUNSUPPORTED;
  190. goto load_elf_failed;
  191. }
  192. // kdebug("filename=%s:\te_entry=%#018lx", path, ehdr.e_entry);
  193. regs->rip = ehdr.e_entry;
  194. current_pcb->mm->code_addr_start = ehdr.e_entry;
  195. // kdebug("ehdr.e_phoff=%#018lx\t ehdr.e_phentsize=%d, ehdr.e_phnum=%d", ehdr.e_phoff, ehdr.e_phentsize, ehdr.e_phnum);
  196. // 将指针移动到program header处
  197. pos = ehdr.e_phoff;
  198. // 读取所有的phdr
  199. pos = filp->file_ops->lseek(filp, pos, SEEK_SET);
  200. filp->file_ops->read(filp, (char *)buf, (uint64_t)ehdr.e_phentsize * (uint64_t)ehdr.e_phnum, &pos);
  201. if ((unsigned long)filp <= 0)
  202. {
  203. kdebug("(unsigned long)filp=%d", (long)filp);
  204. retval = -ENOEXEC;
  205. goto load_elf_failed;
  206. }
  207. Elf64_Phdr *phdr = buf;
  208. // 将程序加载到内存中
  209. for (int i = 0; i < ehdr.e_phnum; ++i, ++phdr)
  210. {
  211. // kdebug("phdr[%d] phdr->p_offset=%#018lx phdr->p_vaddr=%#018lx phdr->p_memsz=%ld phdr->p_filesz=%ld phdr->p_type=%d", i, phdr->p_offset, phdr->p_vaddr, phdr->p_memsz, phdr->p_filesz, phdr->p_type);
  212. // 不是可加载的段
  213. if (phdr->p_type != PT_LOAD)
  214. continue;
  215. int64_t remain_mem_size = phdr->p_memsz;
  216. int64_t remain_file_size = phdr->p_filesz;
  217. pos = phdr->p_offset;
  218. uint64_t virt_base = phdr->p_vaddr;
  219. // kdebug("virt_base = %#018lx, &memory_management_struct=%#018lx", virt_base, &memory_management_struct);
  220. while (remain_mem_size > 0)
  221. {
  222. // todo: 改用slab分配4K大小内存块并映射到4K页
  223. if (!mm_check_mapped((uint64_t)current_pcb->mm->pgd, virt_base)) // 未映射,则新增物理页
  224. {
  225. mm_map_proc_page_table((uint64_t)current_pcb->mm->pgd, true, virt_base, alloc_pages(ZONE_NORMAL, 1, PAGE_PGT_MAPPED)->addr_phys, PAGE_2M_SIZE, PAGE_USER_PAGE, true, true, false);
  226. memset((void *)virt_base, 0, PAGE_2M_SIZE);
  227. }
  228. pos = filp->file_ops->lseek(filp, pos, SEEK_SET);
  229. int64_t val = 0;
  230. if (remain_file_size != 0)
  231. {
  232. int64_t to_trans = (remain_file_size > PAGE_2M_SIZE) ? PAGE_2M_SIZE : remain_file_size;
  233. val = filp->file_ops->read(filp, (char *)virt_base, to_trans, &pos);
  234. }
  235. if (val < 0)
  236. goto load_elf_failed;
  237. remain_mem_size -= PAGE_2M_SIZE;
  238. remain_file_size -= val;
  239. virt_base += PAGE_2M_SIZE;
  240. }
  241. }
  242. // 分配2MB的栈内存空间
  243. regs->rsp = current_pcb->mm->stack_start;
  244. regs->rbp = current_pcb->mm->stack_start;
  245. uint64_t pa = alloc_pages(ZONE_NORMAL, 1, PAGE_PGT_MAPPED)->addr_phys;
  246. mm_map_proc_page_table((uint64_t)current_pcb->mm->pgd, true, current_pcb->mm->stack_start - PAGE_2M_SIZE, pa, PAGE_2M_SIZE, PAGE_USER_PAGE, true, true, false);
  247. // 清空栈空间
  248. memset((void *)(current_pcb->mm->stack_start - PAGE_2M_SIZE), 0, PAGE_2M_SIZE);
  249. load_elf_failed:;
  250. if (buf != NULL)
  251. kfree(buf);
  252. return retval;
  253. }
  254. /**
  255. * @brief 使当前进程去执行新的代码
  256. *
  257. * @param regs 当前进程的寄存器
  258. * @param path 可执行程序的路径
  259. * @param argv 参数列表
  260. * @param envp 环境变量
  261. * @return ul 错误码
  262. */
  263. #pragma GCC push_options
  264. #pragma GCC optimize("O0")
  265. ul do_execve(struct pt_regs *regs, char *path, char *argv[], char *envp[])
  266. {
  267. // kdebug("do_execve is running...");
  268. // 当前进程正在与父进程共享地址空间,需要创建
  269. // 独立的地址空间才能使新程序正常运行
  270. if (current_pcb->flags & PF_VFORK)
  271. {
  272. kdebug("proc:%d creating new mem space", current_pcb->pid);
  273. // 分配新的内存空间分布结构体
  274. struct mm_struct *new_mms = (struct mm_struct *)kmalloc(sizeof(struct mm_struct), 0);
  275. memset(new_mms, 0, sizeof(struct mm_struct));
  276. current_pcb->mm = new_mms;
  277. // 分配顶层页表, 并设置顶层页表的物理地址
  278. new_mms->pgd = (pml4t_t *)virt_2_phys(kmalloc(PAGE_4K_SIZE, 0));
  279. // 由于高2K部分为内核空间,在接下来需要覆盖其数据,因此不用清零
  280. memset(phys_2_virt(new_mms->pgd), 0, PAGE_4K_SIZE / 2);
  281. // 拷贝内核空间的页表指针
  282. memcpy(phys_2_virt(new_mms->pgd) + 256, phys_2_virt(initial_proc[proc_current_cpu_id]) + 256, PAGE_4K_SIZE / 2);
  283. }
  284. // 设置用户栈和用户堆的基地址
  285. unsigned long stack_start_addr = 0x6ffff0a00000UL;
  286. const uint64_t brk_start_addr = 0x700000000000UL;
  287. process_switch_mm(current_pcb);
  288. // 为用户态程序设置地址边界
  289. if (!(current_pcb->flags & PF_KTHREAD))
  290. current_pcb->addr_limit = USER_MAX_LINEAR_ADDR;
  291. current_pcb->mm->code_addr_end = 0;
  292. current_pcb->mm->data_addr_start = 0;
  293. current_pcb->mm->data_addr_end = 0;
  294. current_pcb->mm->rodata_addr_start = 0;
  295. current_pcb->mm->rodata_addr_end = 0;
  296. current_pcb->mm->bss_start = 0;
  297. current_pcb->mm->bss_end = 0;
  298. current_pcb->mm->brk_start = brk_start_addr;
  299. current_pcb->mm->brk_end = brk_start_addr;
  300. current_pcb->mm->stack_start = stack_start_addr;
  301. // 关闭之前的文件描述符
  302. process_exit_files(current_pcb);
  303. // 清除进程的vfork标志位
  304. current_pcb->flags &= ~PF_VFORK;
  305. // 加载elf格式的可执行文件
  306. int tmp = process_load_elf_file(regs, path);
  307. if (tmp < 0)
  308. goto exec_failed;
  309. // 拷贝参数列表
  310. if (argv != NULL)
  311. {
  312. int argc = 0;
  313. // 目标程序的argv基地址指针,最大8个参数
  314. char **dst_argv = (char **)(stack_start_addr - (sizeof(char **) << 3));
  315. uint64_t str_addr = (uint64_t)dst_argv;
  316. for (argc = 0; argc < 8 && argv[argc] != NULL; ++argc)
  317. {
  318. if (*argv[argc] == NULL)
  319. break;
  320. // 测量参数的长度(最大1023)
  321. int argv_len = strnlen_user(argv[argc], 1023) + 1;
  322. strncpy((char *)(str_addr - argv_len), argv[argc], argv_len - 1);
  323. str_addr -= argv_len;
  324. dst_argv[argc] = (char *)str_addr;
  325. // 字符串加上结尾字符
  326. ((char *)str_addr)[argv_len] = '\0';
  327. }
  328. // 重新设定栈基址,并预留空间防止越界
  329. stack_start_addr = str_addr - 8;
  330. current_pcb->mm->stack_start = stack_start_addr;
  331. regs->rsp = regs->rbp = stack_start_addr;
  332. // 传递参数
  333. regs->rdi = argc;
  334. regs->rsi = (uint64_t)dst_argv;
  335. }
  336. // kdebug("execve ok");
  337. regs->cs = USER_CS | 3;
  338. regs->ds = USER_DS | 3;
  339. regs->ss = USER_DS | 0x3;
  340. regs->rflags = 0x200246;
  341. regs->rax = 1;
  342. regs->es = 0;
  343. return 0;
  344. exec_failed:;
  345. process_do_exit(tmp);
  346. }
  347. #pragma GCC pop_options
  348. /**
  349. * @brief 内核init进程
  350. *
  351. * @param arg
  352. * @return ul 参数
  353. */
  354. #pragma GCC push_options
  355. #pragma GCC optimize("O0")
  356. ul initial_kernel_thread(ul arg)
  357. {
  358. // kinfo("initial proc running...\targ:%#018lx", arg);
  359. fat32_init();
  360. usb_init();
  361. // 对一些组件进行单元测试
  362. uint64_t tpid[] = {
  363. ktest_start(ktest_test_bitree, 0),
  364. ktest_start(ktest_test_kfifo, 0),
  365. ktest_start(ktest_test_mutex, 0),
  366. };
  367. kinfo("Waiting test thread exit...");
  368. // 等待测试进程退出
  369. for (int i = 0; i < sizeof(tpid) / sizeof(uint64_t); ++i)
  370. waitpid(tpid[i], NULL, NULL);
  371. kinfo("All test done.");
  372. // pid_t p = fork();
  373. // if (p == 0)
  374. // {
  375. // kdebug("in subproc, rflags=%#018lx", get_rflags());
  376. // while (1)
  377. // usleep(1000);
  378. // }
  379. // kdebug("subprocess pid=%d", p);
  380. // 准备切换到用户态
  381. struct pt_regs *regs;
  382. // 若在后面这段代码中触发中断,return时会导致段选择子错误,从而触发#GP,因此这里需要cli
  383. cli();
  384. current_pcb->thread->rip = (ul)ret_from_system_call;
  385. current_pcb->thread->rsp = (ul)current_pcb + STACK_SIZE - sizeof(struct pt_regs);
  386. current_pcb->thread->fs = USER_DS | 0x3;
  387. barrier();
  388. current_pcb->thread->gs = USER_DS | 0x3;
  389. // 主动放弃内核线程身份
  390. current_pcb->flags &= (~PF_KTHREAD);
  391. kdebug("in initial_kernel_thread: flags=%ld", current_pcb->flags);
  392. regs = (struct pt_regs *)current_pcb->thread->rsp;
  393. // kdebug("current_pcb->thread->rsp=%#018lx", current_pcb->thread->rsp);
  394. current_pcb->flags = 0;
  395. // 将返回用户层的代码压入堆栈,向rdx传入regs的地址,然后jmp到do_execve这个系统调用api的处理函数 这里的设计思路和switch_proc类似
  396. // 加载用户态程序:shell.elf
  397. char init_path[] = "/shell.elf";
  398. uint64_t addr = (uint64_t)&init_path;
  399. __asm__ __volatile__("movq %1, %%rsp \n\t"
  400. "pushq %2 \n\t"
  401. "jmp do_execve \n\t" ::"D"(current_pcb->thread->rsp),
  402. "m"(current_pcb->thread->rsp), "m"(current_pcb->thread->rip), "S"("/shell.elf"), "c"(NULL), "d"(NULL)
  403. : "memory");
  404. return 1;
  405. }
  406. #pragma GCC pop_options
  407. /**
  408. * @brief 当子进程退出后向父进程发送通知
  409. *
  410. */
  411. void process_exit_notify()
  412. {
  413. wait_queue_wakeup(&current_pcb->parent_pcb->wait_child_proc_exit, PROC_INTERRUPTIBLE);
  414. }
  415. /**
  416. * @brief 进程退出时执行的函数
  417. *
  418. * @param code 返回码
  419. * @return ul
  420. */
  421. ul process_do_exit(ul code)
  422. {
  423. // kinfo("process exiting..., code is %ld.", (long)code);
  424. cli();
  425. struct process_control_block *pcb = current_pcb;
  426. // 进程退出时释放资源
  427. process_exit_files(pcb);
  428. process_exit_thread(pcb);
  429. // todo: 可否在这里释放内存结构体?(在判断共享页引用问题之后)
  430. pcb->state = PROC_ZOMBIE;
  431. pcb->exit_code = code;
  432. sti();
  433. process_exit_notify();
  434. sched_cfs();
  435. while (1)
  436. pause();
  437. }
  438. /**
  439. * @brief 初始化内核进程
  440. *
  441. * @param fn 目标程序的地址
  442. * @param arg 向目标程序传入的参数
  443. * @param flags
  444. * @return int
  445. */
  446. int kernel_thread(unsigned long (*fn)(unsigned long), unsigned long arg, unsigned long flags)
  447. {
  448. struct pt_regs regs;
  449. barrier();
  450. memset(&regs, 0, sizeof(regs));
  451. barrier();
  452. // 在rbx寄存器中保存进程的入口地址
  453. regs.rbx = (ul)fn;
  454. // 在rdx寄存器中保存传入的参数
  455. regs.rdx = (ul)arg;
  456. barrier();
  457. regs.ds = KERNEL_DS;
  458. barrier();
  459. regs.es = KERNEL_DS;
  460. barrier();
  461. regs.cs = KERNEL_CS;
  462. barrier();
  463. regs.ss = KERNEL_DS;
  464. barrier();
  465. // 置位中断使能标志位
  466. regs.rflags = (1 << 9);
  467. barrier();
  468. // rip寄存器指向内核线程的引导程序
  469. regs.rip = (ul)kernel_thread_func;
  470. barrier();
  471. // kdebug("kernel_thread_func=%#018lx", kernel_thread_func);
  472. // kdebug("&kernel_thread_func=%#018lx", &kernel_thread_func);
  473. // kdebug("1111\tregs.rip = %#018lx", regs.rip);
  474. return do_fork(&regs, flags | CLONE_VM, 0, 0);
  475. }
  476. /**
  477. * @brief 初始化进程模块
  478. * ☆前置条件:已完成系统调用模块的初始化
  479. */
  480. void process_init()
  481. {
  482. kinfo("Initializing process...");
  483. initial_mm.pgd = (pml4t_t *)get_CR3();
  484. initial_mm.code_addr_start = memory_management_struct.kernel_code_start;
  485. initial_mm.code_addr_end = memory_management_struct.kernel_code_end;
  486. initial_mm.data_addr_start = (ul)&_data;
  487. initial_mm.data_addr_end = memory_management_struct.kernel_data_end;
  488. initial_mm.rodata_addr_start = (ul)&_rodata;
  489. initial_mm.rodata_addr_end = (ul)&_erodata;
  490. initial_mm.bss_start = (uint64_t)&_bss;
  491. initial_mm.bss_end = (uint64_t)&_ebss;
  492. initial_mm.brk_start = memory_management_struct.start_brk;
  493. initial_mm.brk_end = current_pcb->addr_limit;
  494. initial_mm.stack_start = _stack_start;
  495. initial_tss[proc_current_cpu_id].rsp0 = initial_thread.rbp;
  496. // ========= 在IDLE进程的顶层页表中添加对内核地址空间的映射 =====================
  497. // 由于IDLE进程的顶层页表的高地址部分会被后续进程所复制,为了使所有进程能够共享相同的内核空间,
  498. // 因此需要先在IDLE进程的顶层页表内映射二级页表
  499. uint64_t *idle_pml4t_vaddr = (uint64_t *)phys_2_virt((uint64_t)get_CR3() & (~0xfffUL));
  500. for (int i = 256; i < 512; ++i)
  501. {
  502. uint64_t *tmp = idle_pml4t_vaddr + i;
  503. barrier();
  504. if (*tmp == 0)
  505. {
  506. void *pdpt = kmalloc(PAGE_4K_SIZE, 0);
  507. barrier();
  508. memset(pdpt, 0, PAGE_4K_SIZE);
  509. barrier();
  510. set_pml4t(tmp, mk_pml4t(virt_2_phys(pdpt), PAGE_KERNEL_PGT));
  511. }
  512. }
  513. barrier();
  514. flush_tlb();
  515. /*
  516. kdebug("initial_thread.rbp=%#018lx", initial_thread.rbp);
  517. kdebug("initial_tss[0].rsp1=%#018lx", initial_tss[0].rsp1);
  518. kdebug("initial_tss[0].ist1=%#018lx", initial_tss[0].ist1);
  519. */
  520. // 初始化pid的写锁
  521. spin_init(&process_global_pid_write_lock);
  522. // 初始化进程的循环链表
  523. list_init(&initial_proc_union.pcb.list);
  524. barrier();
  525. kernel_thread(initial_kernel_thread, 10, CLONE_FS | CLONE_SIGNAL); // 初始化内核线程
  526. barrier();
  527. initial_proc_union.pcb.state = PROC_RUNNING;
  528. initial_proc_union.pcb.preempt_count = 0;
  529. initial_proc_union.pcb.cpu_id = 0;
  530. initial_proc_union.pcb.virtual_runtime = (1UL << 60);
  531. current_pcb->virtual_runtime = (1UL << 60);
  532. }
  533. /**
  534. * @brief fork当前进程
  535. *
  536. * @param regs 新的寄存器值
  537. * @param clone_flags 克隆标志
  538. * @param stack_start 堆栈开始地址
  539. * @param stack_size 堆栈大小
  540. * @return unsigned long
  541. */
  542. unsigned long do_fork(struct pt_regs *regs, unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size)
  543. {
  544. int retval = 0;
  545. struct process_control_block *tsk = NULL;
  546. // 为新的进程分配栈空间,并将pcb放置在底部
  547. tsk = (struct process_control_block *)kmalloc(STACK_SIZE, 0);
  548. barrier();
  549. if (tsk == NULL)
  550. {
  551. retval = -ENOMEM;
  552. return retval;
  553. }
  554. barrier();
  555. memset(tsk, 0, sizeof(struct process_control_block));
  556. io_mfence();
  557. // 将当前进程的pcb复制到新的pcb内
  558. memcpy(tsk, current_pcb, sizeof(struct process_control_block));
  559. io_mfence();
  560. // 初始化进程的循环链表结点
  561. list_init(&tsk->list);
  562. io_mfence();
  563. // 判断是否为内核态调用fork
  564. if (current_pcb->flags & PF_KTHREAD && stack_start != 0)
  565. tsk->flags |= PF_KFORK;
  566. tsk->priority = 2;
  567. tsk->preempt_count = 0;
  568. // 增加全局的pid并赋值给新进程的pid
  569. spin_lock(&process_global_pid_write_lock);
  570. tsk->pid = process_global_pid++;
  571. barrier();
  572. // 加入到进程链表中
  573. tsk->next_pcb = initial_proc_union.pcb.next_pcb;
  574. barrier();
  575. initial_proc_union.pcb.next_pcb = tsk;
  576. barrier();
  577. tsk->parent_pcb = current_pcb;
  578. barrier();
  579. spin_unlock(&process_global_pid_write_lock);
  580. tsk->cpu_id = proc_current_cpu_id;
  581. tsk->state = PROC_UNINTERRUPTIBLE;
  582. tsk->parent_pcb = current_pcb;
  583. wait_queue_init(&tsk->wait_child_proc_exit, NULL);
  584. barrier();
  585. list_init(&tsk->list);
  586. retval = -ENOMEM;
  587. // 拷贝标志位
  588. if (process_copy_flags(clone_flags, tsk))
  589. goto copy_flags_failed;
  590. // 拷贝内存空间分布结构体
  591. if (process_copy_mm(clone_flags, tsk))
  592. goto copy_mm_failed;
  593. // 拷贝文件
  594. if (process_copy_files(clone_flags, tsk))
  595. goto copy_files_failed;
  596. // 拷贝线程结构体
  597. if (process_copy_thread(clone_flags, tsk, stack_start, stack_size, regs))
  598. goto copy_thread_failed;
  599. // 拷贝成功
  600. retval = tsk->pid;
  601. tsk->flags &= ~PF_KFORK;
  602. // 唤醒进程
  603. process_wakeup(tsk);
  604. return retval;
  605. copy_thread_failed:;
  606. // 回收线程
  607. process_exit_thread(tsk);
  608. copy_files_failed:;
  609. // 回收文件
  610. process_exit_files(tsk);
  611. copy_mm_failed:;
  612. // 回收内存空间分布结构体
  613. process_exit_mm(tsk);
  614. copy_flags_failed:;
  615. kfree(tsk);
  616. return retval;
  617. return 0;
  618. }
  619. /**
  620. * @brief 根据pid获取进程的pcb
  621. *
  622. * @param pid
  623. * @return struct process_control_block*
  624. */
  625. struct process_control_block *process_get_pcb(long pid)
  626. {
  627. struct process_control_block *pcb = initial_proc_union.pcb.next_pcb;
  628. // 使用蛮力法搜索指定pid的pcb
  629. // todo: 使用哈希表来管理pcb
  630. for (; pcb != &initial_proc_union.pcb; pcb = pcb->next_pcb)
  631. {
  632. if (pcb->pid == pid)
  633. return pcb;
  634. }
  635. return NULL;
  636. }
  637. /**
  638. * @brief 将进程加入到调度器的就绪队列中
  639. *
  640. * @param pcb 进程的pcb
  641. */
  642. void process_wakeup(struct process_control_block *pcb)
  643. {
  644. pcb->state = PROC_RUNNING;
  645. sched_cfs_enqueue(pcb);
  646. }
  647. /**
  648. * @brief 将进程加入到调度器的就绪队列中,并标志当前进程需要被调度
  649. *
  650. * @param pcb 进程的pcb
  651. */
  652. void process_wakeup_immediately(struct process_control_block *pcb)
  653. {
  654. pcb->state = PROC_RUNNING;
  655. sched_cfs_enqueue(pcb);
  656. // 将当前进程标志为需要调度,缩短新进程被wakeup的时间
  657. current_pcb->flags |= PF_NEED_SCHED;
  658. }
  659. /**
  660. * @brief 拷贝当前进程的标志位
  661. *
  662. * @param clone_flags 克隆标志位
  663. * @param pcb 新的进程的pcb
  664. * @return uint64_t
  665. */
  666. uint64_t process_copy_flags(uint64_t clone_flags, struct process_control_block *pcb)
  667. {
  668. if (clone_flags & CLONE_VM)
  669. pcb->flags |= PF_VFORK;
  670. return 0;
  671. }
  672. /**
  673. * @brief 拷贝当前进程的文件描述符等信息
  674. *
  675. * @param clone_flags 克隆标志位
  676. * @param pcb 新的进程的pcb
  677. * @return uint64_t
  678. */
  679. uint64_t process_copy_files(uint64_t clone_flags, struct process_control_block *pcb)
  680. {
  681. int retval = 0;
  682. // 如果CLONE_FS被置位,那么子进程与父进程共享文件描述符
  683. // 文件描述符已经在复制pcb时被拷贝
  684. if (clone_flags & CLONE_FS)
  685. return retval;
  686. // 为新进程拷贝新的文件描述符
  687. for (int i = 0; i < PROC_MAX_FD_NUM; ++i)
  688. {
  689. if (current_pcb->fds[i] == NULL)
  690. continue;
  691. pcb->fds[i] = (struct vfs_file_t *)kmalloc(sizeof(struct vfs_file_t), 0);
  692. memcpy(pcb->fds[i], current_pcb->fds[i], sizeof(struct vfs_file_t));
  693. }
  694. return retval;
  695. }
  696. /**
  697. * @brief 回收进程的所有文件描述符
  698. *
  699. * @param pcb 要被回收的进程的pcb
  700. * @return uint64_t
  701. */
  702. uint64_t process_exit_files(struct process_control_block *pcb)
  703. {
  704. // 不与父进程共享文件描述符
  705. if (!(pcb->flags & PF_VFORK))
  706. {
  707. for (int i = 0; i < PROC_MAX_FD_NUM; ++i)
  708. {
  709. if (pcb->fds[i] == NULL)
  710. continue;
  711. kfree(pcb->fds[i]);
  712. }
  713. }
  714. // 清空当前进程的文件描述符列表
  715. memset(pcb->fds, 0, sizeof(struct vfs_file_t *) * PROC_MAX_FD_NUM);
  716. }
  717. /**
  718. * @brief 拷贝当前进程的内存空间分布结构体信息
  719. *
  720. * @param clone_flags 克隆标志位
  721. * @param pcb 新的进程的pcb
  722. * @return uint64_t
  723. */
  724. uint64_t process_copy_mm(uint64_t clone_flags, struct process_control_block *pcb)
  725. {
  726. int retval = 0;
  727. // 与父进程共享内存空间
  728. if (clone_flags & CLONE_VM)
  729. {
  730. pcb->mm = current_pcb->mm;
  731. return retval;
  732. }
  733. // 分配新的内存空间分布结构体
  734. struct mm_struct *new_mms = (struct mm_struct *)kmalloc(sizeof(struct mm_struct), 0);
  735. memset(new_mms, 0, sizeof(struct mm_struct));
  736. memcpy(new_mms, current_pcb->mm, sizeof(struct mm_struct));
  737. pcb->mm = new_mms;
  738. // 分配顶层页表, 并设置顶层页表的物理地址
  739. new_mms->pgd = (pml4t_t *)virt_2_phys(kmalloc(PAGE_4K_SIZE, 0));
  740. // 由于高2K部分为内核空间,在接下来需要覆盖其数据,因此不用清零
  741. memset(phys_2_virt(new_mms->pgd), 0, PAGE_4K_SIZE / 2);
  742. // 拷贝内核空间的页表指针
  743. memcpy(phys_2_virt(new_mms->pgd) + 256, phys_2_virt(initial_proc[proc_current_cpu_id]->mm->pgd) + 256, PAGE_4K_SIZE / 2);
  744. uint64_t *current_pgd = (uint64_t *)phys_2_virt(current_pcb->mm->pgd);
  745. uint64_t *new_pml4t = (uint64_t *)phys_2_virt(new_mms->pgd);
  746. // 迭代地拷贝用户空间
  747. for (int i = 0; i <= 255; ++i)
  748. {
  749. // 当前页表项为空
  750. if ((*(uint64_t *)(current_pgd + i)) == 0)
  751. continue;
  752. // 分配新的二级页表
  753. uint64_t *new_pdpt = (uint64_t *)kmalloc(PAGE_4K_SIZE, 0);
  754. memset(new_pdpt, 0, PAGE_4K_SIZE);
  755. // 在新的一级页表中设置新的二级页表表项
  756. set_pml4t(new_pml4t + i, mk_pml4t(virt_2_phys(new_pdpt), (*(current_pgd + i)) & 0xfffUL));
  757. uint64_t *current_pdpt = (uint64_t *)phys_2_virt((*(uint64_t *)(current_pgd + i)) & (~0xfffUL));
  758. // kdebug("current_pdpt=%#018lx, current_pid=%d", current_pdpt, current_pcb->pid);
  759. for (int j = 0; j < 512; ++j)
  760. {
  761. if (*(current_pdpt + j) == 0)
  762. continue;
  763. // 分配新的三级页表
  764. uint64_t *new_pdt = (uint64_t *)kmalloc(PAGE_4K_SIZE, 0);
  765. memset(new_pdt, 0, PAGE_4K_SIZE);
  766. // 在二级页表中填写新的三级页表
  767. // 在新的二级页表中设置三级页表的表项
  768. set_pdpt((uint64_t *)(new_pdpt + j), mk_pdpt(virt_2_phys(new_pdt), (*(current_pdpt + j)) & 0xfffUL));
  769. uint64_t *current_pdt = (uint64_t *)phys_2_virt((*(current_pdpt + j)) & (~0xfffUL));
  770. // kdebug("current_pdt=%#018lx", current_pdt);
  771. // 循环拷贝三级页表
  772. for (int k = 0; k < 512; ++k)
  773. {
  774. if (*(current_pdt + k) == 0)
  775. continue;
  776. // 获取新的物理页
  777. uint64_t pa = alloc_pages(ZONE_NORMAL, 1, PAGE_PGT_MAPPED)->addr_phys;
  778. memset((void *)phys_2_virt(pa), 0, PAGE_2M_SIZE);
  779. set_pdt((uint64_t *)(new_pdt + k), mk_pdt(pa, *(current_pdt + k) & 0x1ffUL));
  780. // 拷贝数据
  781. memcpy(phys_2_virt(pa), phys_2_virt((*(current_pdt + k)) & (~0x1ffUL)), PAGE_2M_SIZE);
  782. }
  783. }
  784. }
  785. return retval;
  786. }
  787. /**
  788. * @brief 释放进程的页表
  789. *
  790. * @param pcb 要被释放页表的进程
  791. * @return uint64_t
  792. */
  793. uint64_t process_exit_mm(struct process_control_block *pcb)
  794. {
  795. if (pcb->flags & CLONE_VM)
  796. return 0;
  797. if (pcb->mm == NULL)
  798. {
  799. kdebug("pcb->mm==NULL");
  800. return 0;
  801. }
  802. if (pcb->mm->pgd == NULL)
  803. {
  804. kdebug("pcb->mm->pgd==NULL");
  805. return 0;
  806. }
  807. // 获取顶层页表
  808. pml4t_t *current_pgd = (pml4t_t *)phys_2_virt(pcb->mm->pgd);
  809. // 迭代地释放用户空间
  810. for (int i = 0; i <= 255; ++i)
  811. {
  812. // 当前页表项为空
  813. if ((current_pgd + i)->pml4t == 0)
  814. continue;
  815. // 二级页表entry
  816. pdpt_t *current_pdpt = (pdpt_t *)phys_2_virt((current_pgd + i)->pml4t & (~0xfffUL));
  817. // 遍历二级页表
  818. for (int j = 0; j < 512; ++j)
  819. {
  820. if ((current_pdpt + j)->pdpt == 0)
  821. continue;
  822. // 三级页表的entry
  823. pdt_t *current_pdt = (pdt_t *)phys_2_virt((current_pdpt + j)->pdpt & (~0xfffUL));
  824. // 释放三级页表的内存页
  825. for (int k = 0; k < 512; ++k)
  826. {
  827. if ((current_pdt + k)->pdt == 0)
  828. continue;
  829. // 存在4级页表
  830. if (unlikely(((current_pdt + k)->pdt & (1 << 7)) == 0))
  831. {
  832. // 存在4K页
  833. uint64_t *pt_ptr = (uint64_t *)phys_2_virt((current_pdt + k)->pdt & (~0x1fffUL));
  834. uint64_t *pte_ptr = pt_ptr;
  835. // 循环处理4K页表, 直接清空
  836. // todo: 当支持使用slab分配4K内存作为进程的4K页之后,在这里需要释放这些4K对象
  837. for (int16_t g = 0; g < 512; ++g, ++pte_ptr)
  838. *pte_ptr = 0;
  839. // 4级页表已经空了,释放页表
  840. if (unlikely(mm_check_page_table(pt_ptr)) == 0)
  841. kfree(pt_ptr);
  842. }
  843. else
  844. {
  845. // 释放内存页
  846. if (mm_is_2M_page((current_pdt + k)->pdt & (~0x1fffUL))) // 校验是否为内存中的物理页
  847. free_pages(Phy_to_2M_Page((current_pdt + k)->pdt & (~0x1fffUL)), 1);
  848. }
  849. }
  850. // 释放三级页表
  851. kfree(current_pdt);
  852. }
  853. // 释放二级页表
  854. kfree(current_pdpt);
  855. }
  856. // 释放顶层页表
  857. kfree(current_pgd);
  858. // 释放内存空间分布结构体
  859. kfree(pcb->mm);
  860. return 0;
  861. }
  862. /**
  863. * @brief 重写内核栈中的rbp地址
  864. *
  865. * @param new_regs 子进程的reg
  866. * @param new_pcb 子进程的pcb
  867. * @return int
  868. */
  869. static int process_rewrite_rbp(struct pt_regs *new_regs, struct process_control_block *new_pcb)
  870. {
  871. uint64_t new_top = ((uint64_t)new_pcb) + STACK_SIZE;
  872. uint64_t old_top = (uint64_t)(current_pcb) + STACK_SIZE;
  873. uint64_t *rbp = &new_regs->rbp;
  874. uint64_t *tmp = rbp;
  875. // 超出内核栈范围
  876. if ((uint64_t)*rbp >= old_top || (uint64_t)*rbp < (old_top - STACK_SIZE))
  877. return 0;
  878. while (1)
  879. {
  880. // 计算delta
  881. uint64_t delta = old_top - *rbp;
  882. // 计算新的rbp值
  883. uint64_t newVal = new_top - delta;
  884. // 新的值不合法
  885. if (unlikely((uint64_t)newVal >= new_top || (uint64_t)newVal < (new_top - STACK_SIZE)))
  886. break;
  887. // 将新的值写入对应位置
  888. *rbp = newVal;
  889. // 跳转栈帧
  890. rbp = (uint64_t *)*rbp;
  891. }
  892. // 设置内核态fork返回到enter_syscall_int()函数内的时候,rsp寄存器的值
  893. new_regs->rsp = new_top - (old_top - new_regs->rsp);
  894. return 0;
  895. }
  896. /**
  897. * @brief 拷贝当前进程的线程结构体
  898. *
  899. * @param clone_flags 克隆标志位
  900. * @param pcb 新的进程的pcb
  901. * @return uint64_t
  902. */
  903. uint64_t process_copy_thread(uint64_t clone_flags, struct process_control_block *pcb, uint64_t stack_start, uint64_t stack_size, struct pt_regs *current_regs)
  904. {
  905. // 将线程结构体放置在pcb后方
  906. struct thread_struct *thd = (struct thread_struct *)(pcb + 1);
  907. memset(thd, 0, sizeof(struct thread_struct));
  908. pcb->thread = thd;
  909. struct pt_regs *child_regs = NULL;
  910. // 拷贝栈空间
  911. if (pcb->flags & PF_KFORK) // 内核态下的fork
  912. {
  913. // 内核态下则拷贝整个内核栈
  914. uint32_t size = ((uint64_t)current_pcb) + STACK_SIZE - (uint64_t)(current_regs);
  915. child_regs = (struct pt_regs *)(((uint64_t)pcb) + STACK_SIZE - size);
  916. memcpy(child_regs, (void *)current_regs, size);
  917. barrier();
  918. // 然后重写新的栈中,每个栈帧的rbp值
  919. process_rewrite_rbp(child_regs, pcb);
  920. }
  921. else
  922. {
  923. child_regs = (struct pt_regs *)((uint64_t)pcb + STACK_SIZE - sizeof(struct pt_regs));
  924. memcpy(child_regs, current_regs, sizeof(struct pt_regs));
  925. barrier();
  926. child_regs->rsp = stack_start;
  927. }
  928. // 设置子进程的返回值为0
  929. child_regs->rax = 0;
  930. if (pcb->flags & PF_KFORK)
  931. thd->rbp = (uint64_t)(child_regs + 1); // 设置新的内核线程开始执行时的rbp(也就是进入ret_from_system_call时的rbp)
  932. else
  933. thd->rbp = (uint64_t)pcb + STACK_SIZE;
  934. // 设置新的内核线程开始执行的时候的rsp
  935. thd->rsp = (uint64_t)child_regs;
  936. thd->fs = current_pcb->thread->fs;
  937. thd->gs = current_pcb->thread->gs;
  938. // 根据是否为内核线程、是否在内核态fork,设置进程的开始执行的地址
  939. if (pcb->flags & PF_KFORK)
  940. thd->rip = (uint64_t)ret_from_system_call;
  941. else if (pcb->flags & PF_KTHREAD && (!(pcb->flags & PF_KFORK)))
  942. thd->rip = (uint64_t)kernel_thread_func;
  943. else
  944. thd->rip = (uint64_t)ret_from_system_call;
  945. return 0;
  946. }
  947. /**
  948. * @brief todo: 回收线程结构体
  949. *
  950. * @param pcb
  951. */
  952. void process_exit_thread(struct process_control_block *pcb)
  953. {
  954. }
  955. // #pragma GCC pop_options