process.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178
  1. #include "process.h"
  2. #include <common/printk.h>
  3. #include <common/kprint.h>
  4. #include <common/stdio.h>
  5. #include <common/string.h>
  6. #include <common/compiler.h>
  7. #include <common/libELF/elf.h>
  8. #include <common/time.h>
  9. #include <common/sys/wait.h>
  10. #include <driver/video/video.h>
  11. #include <driver/usb/usb.h>
  12. #include <exception/gate.h>
  13. #include <filesystem/fat32/fat32.h>
  14. #include <filesystem/devfs/devfs.h>
  15. #include <mm/slab.h>
  16. #include <common/spinlock.h>
  17. #include <syscall/syscall.h>
  18. #include <syscall/syscall_num.h>
  19. #include <sched/sched.h>
  20. #include <common/unistd.h>
  21. #include <debug/traceback/traceback.h>
  22. #include <driver/disk/ahci/ahci.h>
  23. #include <ktest/ktest.h>
  24. #include <mm/mmio.h>
  25. #include <common/lz4.h>
  26. // #pragma GCC push_options
  27. // #pragma GCC optimize("O0")
  28. spinlock_t process_global_pid_write_lock; // 增加pid的写锁
  29. long process_global_pid = 1; // 系统中最大的pid
  30. extern void system_call(void);
  31. extern void kernel_thread_func(void);
  32. ul _stack_start; // initial proc的栈基地址(虚拟地址)
  33. extern struct mm_struct initial_mm;
  34. struct thread_struct initial_thread =
  35. {
  36. .rbp = (ul)(initial_proc_union.stack + STACK_SIZE / sizeof(ul)),
  37. .rsp = (ul)(initial_proc_union.stack + STACK_SIZE / sizeof(ul)),
  38. .fs = KERNEL_DS,
  39. .gs = KERNEL_DS,
  40. .cr2 = 0,
  41. .trap_num = 0,
  42. .err_code = 0};
  43. // 初始化 初始进程的union ,并将其链接到.data.init_proc段内
  44. union proc_union initial_proc_union __attribute__((__section__(".data.init_proc_union"))) = {INITIAL_PROC(initial_proc_union.pcb)};
  45. struct process_control_block *initial_proc[MAX_CPU_NUM] = {&initial_proc_union.pcb, 0};
  46. // 为每个核心初始化初始进程的tss
  47. struct tss_struct initial_tss[MAX_CPU_NUM] = {[0 ... MAX_CPU_NUM - 1] = INITIAL_TSS};
  48. /**
  49. * @brief 拷贝当前进程的标志位
  50. *
  51. * @param clone_flags 克隆标志位
  52. * @param pcb 新的进程的pcb
  53. * @return uint64_t
  54. */
  55. uint64_t process_copy_flags(uint64_t clone_flags, struct process_control_block *pcb);
  56. /**
  57. * @brief 拷贝当前进程的文件描述符等信息
  58. *
  59. * @param clone_flags 克隆标志位
  60. * @param pcb 新的进程的pcb
  61. * @return uint64_t
  62. */
  63. uint64_t process_copy_files(uint64_t clone_flags, struct process_control_block *pcb);
  64. /**
  65. * @brief 回收进程的所有文件描述符
  66. *
  67. * @param pcb 要被回收的进程的pcb
  68. * @return uint64_t
  69. */
  70. uint64_t process_exit_files(struct process_control_block *pcb);
  71. /**
  72. * @brief 拷贝当前进程的内存空间分布结构体信息
  73. *
  74. * @param clone_flags 克隆标志位
  75. * @param pcb 新的进程的pcb
  76. * @return uint64_t
  77. */
  78. uint64_t process_copy_mm(uint64_t clone_flags, struct process_control_block *pcb);
  79. /**
  80. * @brief 释放进程的页表
  81. *
  82. * @param pcb 要被释放页表的进程
  83. * @return uint64_t
  84. */
  85. uint64_t process_exit_mm(struct process_control_block *pcb);
  86. /**
  87. * @brief 拷贝当前进程的线程结构体
  88. *
  89. * @param clone_flags 克隆标志位
  90. * @param pcb 新的进程的pcb
  91. * @return uint64_t
  92. */
  93. uint64_t process_copy_thread(uint64_t clone_flags, struct process_control_block *pcb, uint64_t stack_start, uint64_t stack_size, struct pt_regs *current_regs);
  94. void process_exit_thread(struct process_control_block *pcb);
  95. /**
  96. * @brief 切换进程
  97. *
  98. * @param prev 上一个进程的pcb
  99. * @param next 将要切换到的进程的pcb
  100. * 由于程序在进入内核的时候已经保存了寄存器,因此这里不需要保存寄存器。
  101. * 这里切换fs和gs寄存器
  102. */
  103. #pragma GCC push_options
  104. #pragma GCC optimize("O0")
  105. void __switch_to(struct process_control_block *prev, struct process_control_block *next)
  106. {
  107. initial_tss[proc_current_cpu_id].rsp0 = next->thread->rbp;
  108. // kdebug("next_rsp = %#018lx ", next->thread->rsp);
  109. // set_tss64((uint *)phys_2_virt(TSS64_Table), initial_tss[0].rsp0, initial_tss[0].rsp1, initial_tss[0].rsp2, initial_tss[0].ist1,
  110. // initial_tss[0].ist2, initial_tss[0].ist3, initial_tss[0].ist4, initial_tss[0].ist5, initial_tss[0].ist6, initial_tss[0].ist7);
  111. __asm__ __volatile__("movq %%fs, %0 \n\t"
  112. : "=a"(prev->thread->fs));
  113. __asm__ __volatile__("movq %%gs, %0 \n\t"
  114. : "=a"(prev->thread->gs));
  115. __asm__ __volatile__("movq %0, %%fs \n\t" ::"a"(next->thread->fs));
  116. __asm__ __volatile__("movq %0, %%gs \n\t" ::"a"(next->thread->gs));
  117. }
  118. #pragma GCC pop_options
  119. /**
  120. * @brief 打开要执行的程序文件
  121. *
  122. * @param path
  123. * @return struct vfs_file_t*
  124. */
  125. struct vfs_file_t *process_open_exec_file(char *path)
  126. {
  127. struct vfs_dir_entry_t *dentry = NULL;
  128. struct vfs_file_t *filp = NULL;
  129. dentry = vfs_path_walk(path, 0);
  130. if (dentry == NULL)
  131. return (void *)-ENOENT;
  132. if (dentry->dir_inode->attribute == VFS_IF_DIR)
  133. return (void *)-ENOTDIR;
  134. filp = (struct vfs_file_t *)kmalloc(sizeof(struct vfs_file_t), 0);
  135. if (filp == NULL)
  136. return (void *)-ENOMEM;
  137. filp->position = 0;
  138. filp->mode = 0;
  139. filp->dEntry = dentry;
  140. filp->mode = ATTR_READ_ONLY;
  141. filp->file_ops = dentry->dir_inode->file_ops;
  142. return filp;
  143. }
  144. /**
  145. * @brief 加载elf格式的程序文件到内存中,并设置regs
  146. *
  147. * @param regs 寄存器
  148. * @param path 文件路径
  149. * @return int
  150. */
  151. static int process_load_elf_file(struct pt_regs *regs, char *path)
  152. {
  153. int retval = 0;
  154. struct vfs_file_t *filp = process_open_exec_file(path);
  155. if ((long)filp <= 0 && (long)filp >= -255)
  156. {
  157. // kdebug("(long)filp=%ld", (long)filp);
  158. return (unsigned long)filp;
  159. }
  160. void *buf = kmalloc(PAGE_4K_SIZE, 0);
  161. memset(buf, 0, PAGE_4K_SIZE);
  162. uint64_t pos = 0;
  163. pos = filp->file_ops->lseek(filp, 0, SEEK_SET);
  164. retval = filp->file_ops->read(filp, (char *)buf, sizeof(Elf64_Ehdr), &pos);
  165. retval = 0;
  166. if (!elf_check(buf))
  167. {
  168. kerror("Not an ELF file: %s", path);
  169. retval = -ENOTSUP;
  170. goto load_elf_failed;
  171. }
  172. #if ARCH(X86_64)
  173. // 暂时只支持64位的文件
  174. if (((Elf32_Ehdr *)buf)->e_ident[EI_CLASS] != ELFCLASS64)
  175. {
  176. kdebug("((Elf32_Ehdr *)buf)->e_ident[EI_CLASS]=%d", ((Elf32_Ehdr *)buf)->e_ident[EI_CLASS]);
  177. retval = -EUNSUPPORTED;
  178. goto load_elf_failed;
  179. }
  180. Elf64_Ehdr ehdr = *(Elf64_Ehdr *)buf;
  181. // 暂时只支持AMD64架构
  182. if (ehdr.e_machine != EM_AMD64)
  183. {
  184. kerror("e_machine=%d", ehdr.e_machine);
  185. retval = -EUNSUPPORTED;
  186. goto load_elf_failed;
  187. }
  188. #else
  189. #error Unsupported architecture!
  190. #endif
  191. if (ehdr.e_type != ET_EXEC)
  192. {
  193. kerror("Not executable file! filename=%s\tehdr->e_type=%d", path, ehdr.e_type);
  194. retval = -EUNSUPPORTED;
  195. goto load_elf_failed;
  196. }
  197. // kdebug("filename=%s:\te_entry=%#018lx", path, ehdr.e_entry);
  198. regs->rip = ehdr.e_entry;
  199. current_pcb->mm->code_addr_start = ehdr.e_entry;
  200. // kdebug("ehdr.e_phoff=%#018lx\t ehdr.e_phentsize=%d, ehdr.e_phnum=%d", ehdr.e_phoff, ehdr.e_phentsize, ehdr.e_phnum);
  201. // 将指针移动到program header处
  202. pos = ehdr.e_phoff;
  203. // 读取所有的phdr
  204. pos = filp->file_ops->lseek(filp, pos, SEEK_SET);
  205. filp->file_ops->read(filp, (char *)buf, (uint64_t)ehdr.e_phentsize * (uint64_t)ehdr.e_phnum, &pos);
  206. if ((unsigned long)filp <= 0)
  207. {
  208. kdebug("(unsigned long)filp=%d", (long)filp);
  209. retval = -ENOEXEC;
  210. goto load_elf_failed;
  211. }
  212. Elf64_Phdr *phdr = buf;
  213. // 将程序加载到内存中
  214. for (int i = 0; i < ehdr.e_phnum; ++i, ++phdr)
  215. {
  216. // kdebug("phdr[%d] phdr->p_offset=%#018lx phdr->p_vaddr=%#018lx phdr->p_memsz=%ld phdr->p_filesz=%ld phdr->p_type=%d", i, phdr->p_offset, phdr->p_vaddr, phdr->p_memsz, phdr->p_filesz, phdr->p_type);
  217. // 不是可加载的段
  218. if (phdr->p_type != PT_LOAD)
  219. continue;
  220. int64_t remain_mem_size = phdr->p_memsz;
  221. int64_t remain_file_size = phdr->p_filesz;
  222. pos = phdr->p_offset;
  223. uint64_t virt_base = 0;
  224. uint64_t beginning_offset = 0; // 由于页表映射导致的virtbase与实际的p_vaddr之间的偏移量
  225. if (remain_mem_size >= PAGE_2M_SIZE) // 接下来存在映射2M页的情况,因此将vaddr按2M向下对齐
  226. virt_base = phdr->p_vaddr & PAGE_2M_MASK;
  227. else // 接下来只有4K页的映射
  228. virt_base = phdr->p_vaddr & PAGE_4K_MASK;
  229. beginning_offset = phdr->p_vaddr - virt_base;
  230. remain_mem_size += beginning_offset;
  231. while (remain_mem_size > 0)
  232. {
  233. // kdebug("loading...");
  234. int64_t map_size = 0;
  235. if (remain_mem_size >= PAGE_2M_SIZE)
  236. {
  237. uint64_t pa = alloc_pages(ZONE_NORMAL, 1, PAGE_PGT_MAPPED)->addr_phys;
  238. struct vm_area_struct *vma = NULL;
  239. int ret = mm_create_vma(current_pcb->mm, virt_base, PAGE_2M_SIZE, VM_USER | VM_ACCESS_FLAGS, NULL, &vma);
  240. // 防止内存泄露
  241. if (ret == -EEXIST)
  242. free_pages(Phy_to_2M_Page(pa), 1);
  243. else
  244. mm_map_vma(vma, pa);
  245. io_mfence();
  246. memset((void *)virt_base, 0, PAGE_2M_SIZE);
  247. map_size = PAGE_2M_SIZE;
  248. }
  249. else
  250. {
  251. // todo: 使用4K、8K、32K大小内存块混合进行分配,提高空间利用率(减少了bmp的大小)
  252. map_size = ALIGN(remain_mem_size, PAGE_4K_SIZE);
  253. // 循环分配4K大小内存块
  254. for (uint64_t off = 0; off < map_size; off += PAGE_4K_SIZE)
  255. {
  256. uint64_t paddr = virt_2_phys((uint64_t)kmalloc(PAGE_4K_SIZE, 0));
  257. struct vm_area_struct *vma = NULL;
  258. int val = mm_create_vma(current_pcb->mm, virt_base + off, PAGE_4K_SIZE, VM_USER | VM_ACCESS_FLAGS, NULL, &vma);
  259. if (val == -EEXIST)
  260. kfree(phys_2_virt(paddr));
  261. else
  262. mm_map_vma(vma, paddr);
  263. io_mfence();
  264. memset((void *)(virt_base + off), 0, PAGE_4K_SIZE);
  265. }
  266. }
  267. pos = filp->file_ops->lseek(filp, pos, SEEK_SET);
  268. int64_t val = 0;
  269. if (remain_file_size > 0)
  270. {
  271. int64_t to_trans = (remain_file_size > PAGE_2M_SIZE) ? PAGE_2M_SIZE : remain_file_size;
  272. val = filp->file_ops->read(filp, (char *)(virt_base + beginning_offset), to_trans, &pos);
  273. }
  274. if (val < 0)
  275. goto load_elf_failed;
  276. remain_mem_size -= map_size;
  277. remain_file_size -= val;
  278. virt_base += map_size;
  279. }
  280. }
  281. // 分配2MB的栈内存空间
  282. regs->rsp = current_pcb->mm->stack_start;
  283. regs->rbp = current_pcb->mm->stack_start;
  284. {
  285. struct vm_area_struct *vma = NULL;
  286. uint64_t pa = alloc_pages(ZONE_NORMAL, 1, PAGE_PGT_MAPPED)->addr_phys;
  287. int val = mm_create_vma(current_pcb->mm, current_pcb->mm->stack_start - PAGE_2M_SIZE, PAGE_2M_SIZE, VM_USER | VM_ACCESS_FLAGS, NULL, &vma);
  288. if (val == -EEXIST)
  289. free_pages(Phy_to_2M_Page(pa), 1);
  290. else
  291. mm_map_vma(vma, pa);
  292. }
  293. // 清空栈空间
  294. memset((void *)(current_pcb->mm->stack_start - PAGE_2M_SIZE), 0, PAGE_2M_SIZE);
  295. load_elf_failed:;
  296. if (buf != NULL)
  297. kfree(buf);
  298. return retval;
  299. }
  300. /**
  301. * @brief 使当前进程去执行新的代码
  302. *
  303. * @param regs 当前进程的寄存器
  304. * @param path 可执行程序的路径
  305. * @param argv 参数列表
  306. * @param envp 环境变量
  307. * @return ul 错误码
  308. */
  309. #pragma GCC push_options
  310. #pragma GCC optimize("O0")
  311. ul do_execve(struct pt_regs *regs, char *path, char *argv[], char *envp[])
  312. {
  313. // kdebug("do_execve is running...");
  314. // 当前进程正在与父进程共享地址空间,需要创建
  315. // 独立的地址空间才能使新程序正常运行
  316. if (current_pcb->flags & PF_VFORK)
  317. {
  318. kdebug("proc:%d creating new mem space", current_pcb->pid);
  319. // 分配新的内存空间分布结构体
  320. struct mm_struct *new_mms = (struct mm_struct *)kmalloc(sizeof(struct mm_struct), 0);
  321. memset(new_mms, 0, sizeof(struct mm_struct));
  322. current_pcb->mm = new_mms;
  323. // 分配顶层页表, 并设置顶层页表的物理地址
  324. new_mms->pgd = (pml4t_t *)virt_2_phys(kmalloc(PAGE_4K_SIZE, 0));
  325. // 由于高2K部分为内核空间,在接下来需要覆盖其数据,因此不用清零
  326. memset(phys_2_virt(new_mms->pgd), 0, PAGE_4K_SIZE / 2);
  327. // 拷贝内核空间的页表指针
  328. memcpy(phys_2_virt(new_mms->pgd) + 256, phys_2_virt(initial_proc[proc_current_cpu_id]) + 256, PAGE_4K_SIZE / 2);
  329. }
  330. // 设置用户栈和用户堆的基地址
  331. unsigned long stack_start_addr = 0x6ffff0a00000UL;
  332. const uint64_t brk_start_addr = 0x700000000000UL;
  333. process_switch_mm(current_pcb);
  334. // 为用户态程序设置地址边界
  335. if (!(current_pcb->flags & PF_KTHREAD))
  336. current_pcb->addr_limit = USER_MAX_LINEAR_ADDR;
  337. current_pcb->mm->code_addr_end = 0;
  338. current_pcb->mm->data_addr_start = 0;
  339. current_pcb->mm->data_addr_end = 0;
  340. current_pcb->mm->rodata_addr_start = 0;
  341. current_pcb->mm->rodata_addr_end = 0;
  342. current_pcb->mm->bss_start = 0;
  343. current_pcb->mm->bss_end = 0;
  344. current_pcb->mm->brk_start = brk_start_addr;
  345. current_pcb->mm->brk_end = brk_start_addr;
  346. current_pcb->mm->stack_start = stack_start_addr;
  347. // 关闭之前的文件描述符
  348. process_exit_files(current_pcb);
  349. // 清除进程的vfork标志位
  350. current_pcb->flags &= ~PF_VFORK;
  351. // 加载elf格式的可执行文件
  352. int tmp = process_load_elf_file(regs, path);
  353. if (tmp < 0)
  354. goto exec_failed;
  355. // 拷贝参数列表
  356. if (argv != NULL)
  357. {
  358. int argc = 0;
  359. // 目标程序的argv基地址指针,最大8个参数
  360. char **dst_argv = (char **)(stack_start_addr - (sizeof(char **) << 3));
  361. uint64_t str_addr = (uint64_t)dst_argv;
  362. for (argc = 0; argc < 8 && argv[argc] != NULL; ++argc)
  363. {
  364. if (*argv[argc] == NULL)
  365. break;
  366. // 测量参数的长度(最大1023)
  367. int argv_len = strnlen_user(argv[argc], 1023) + 1;
  368. strncpy((char *)(str_addr - argv_len), argv[argc], argv_len - 1);
  369. str_addr -= argv_len;
  370. dst_argv[argc] = (char *)str_addr;
  371. // 字符串加上结尾字符
  372. ((char *)str_addr)[argv_len] = '\0';
  373. }
  374. // 重新设定栈基址,并预留空间防止越界
  375. stack_start_addr = str_addr - 8;
  376. current_pcb->mm->stack_start = stack_start_addr;
  377. regs->rsp = regs->rbp = stack_start_addr;
  378. // 传递参数
  379. regs->rdi = argc;
  380. regs->rsi = (uint64_t)dst_argv;
  381. }
  382. // kdebug("execve ok");
  383. regs->cs = USER_CS | 3;
  384. regs->ds = USER_DS | 3;
  385. regs->ss = USER_DS | 0x3;
  386. regs->rflags = 0x200246;
  387. regs->rax = 1;
  388. regs->es = 0;
  389. return 0;
  390. exec_failed:;
  391. process_do_exit(tmp);
  392. }
  393. #pragma GCC pop_options
  394. /**
  395. * @brief 内核init进程
  396. *
  397. * @param arg
  398. * @return ul 参数
  399. */
  400. #pragma GCC push_options
  401. #pragma GCC optimize("O0")
  402. ul initial_kernel_thread(ul arg)
  403. {
  404. // kinfo("initial proc running...\targ:%#018lx", arg);
  405. ahci_init();
  406. fat32_init();
  407. rootfs_umount();
  408. // 使用单独的内核线程来初始化usb驱动程序
  409. int usb_pid = kernel_thread(usb_init, 0, 0);
  410. kinfo("LZ4 lib Version=%s", LZ4_versionString());
  411. // 对一些组件进行单元测试
  412. uint64_t tpid[] = {
  413. ktest_start(ktest_test_bitree, 0),
  414. ktest_start(ktest_test_kfifo, 0),
  415. ktest_start(ktest_test_mutex, 0),
  416. usb_pid,
  417. };
  418. kinfo("Waiting test thread exit...");
  419. // 等待测试进程退出
  420. for (int i = 0; i < sizeof(tpid) / sizeof(uint64_t); ++i)
  421. waitpid(tpid[i], NULL, NULL);
  422. kinfo("All test done.");
  423. // 准备切换到用户态
  424. struct pt_regs *regs;
  425. // 若在后面这段代码中触发中断,return时会导致段选择子错误,从而触发#GP,因此这里需要cli
  426. cli();
  427. current_pcb->thread->rip = (ul)ret_from_system_call;
  428. current_pcb->thread->rsp = (ul)current_pcb + STACK_SIZE - sizeof(struct pt_regs);
  429. current_pcb->thread->fs = USER_DS | 0x3;
  430. barrier();
  431. current_pcb->thread->gs = USER_DS | 0x3;
  432. // 主动放弃内核线程身份
  433. current_pcb->flags &= (~PF_KTHREAD);
  434. kdebug("in initial_kernel_thread: flags=%ld", current_pcb->flags);
  435. regs = (struct pt_regs *)current_pcb->thread->rsp;
  436. // kdebug("current_pcb->thread->rsp=%#018lx", current_pcb->thread->rsp);
  437. current_pcb->flags = 0;
  438. // 将返回用户层的代码压入堆栈,向rdx传入regs的地址,然后jmp到do_execve这个系统调用api的处理函数 这里的设计思路和switch_proc类似
  439. // 加载用户态程序:shell.elf
  440. char init_path[] = "/shell.elf";
  441. uint64_t addr = (uint64_t)&init_path;
  442. __asm__ __volatile__("movq %1, %%rsp \n\t"
  443. "pushq %2 \n\t"
  444. "jmp do_execve \n\t" ::"D"(current_pcb->thread->rsp),
  445. "m"(current_pcb->thread->rsp), "m"(current_pcb->thread->rip), "S"("/shell.elf"), "c"(NULL), "d"(NULL)
  446. : "memory");
  447. return 1;
  448. }
  449. #pragma GCC pop_options
  450. /**
  451. * @brief 当子进程退出后向父进程发送通知
  452. *
  453. */
  454. void process_exit_notify()
  455. {
  456. wait_queue_wakeup(&current_pcb->parent_pcb->wait_child_proc_exit, PROC_INTERRUPTIBLE);
  457. }
  458. /**
  459. * @brief 进程退出时执行的函数
  460. *
  461. * @param code 返回码
  462. * @return ul
  463. */
  464. ul process_do_exit(ul code)
  465. {
  466. // kinfo("process exiting..., code is %ld.", (long)code);
  467. cli();
  468. struct process_control_block *pcb = current_pcb;
  469. // 进程退出时释放资源
  470. process_exit_files(pcb);
  471. process_exit_thread(pcb);
  472. // todo: 可否在这里释放内存结构体?(在判断共享页引用问题之后)
  473. pcb->state = PROC_ZOMBIE;
  474. pcb->exit_code = code;
  475. sti();
  476. process_exit_notify();
  477. sched();
  478. while (1)
  479. pause();
  480. }
  481. /**
  482. * @brief 初始化内核进程
  483. *
  484. * @param fn 目标程序的地址
  485. * @param arg 向目标程序传入的参数
  486. * @param flags
  487. * @return int
  488. */
  489. int kernel_thread(unsigned long (*fn)(unsigned long), unsigned long arg, unsigned long flags)
  490. {
  491. struct pt_regs regs;
  492. barrier();
  493. memset(&regs, 0, sizeof(regs));
  494. barrier();
  495. // 在rbx寄存器中保存进程的入口地址
  496. regs.rbx = (ul)fn;
  497. // 在rdx寄存器中保存传入的参数
  498. regs.rdx = (ul)arg;
  499. barrier();
  500. regs.ds = KERNEL_DS;
  501. barrier();
  502. regs.es = KERNEL_DS;
  503. barrier();
  504. regs.cs = KERNEL_CS;
  505. barrier();
  506. regs.ss = KERNEL_DS;
  507. barrier();
  508. // 置位中断使能标志位
  509. regs.rflags = (1 << 9);
  510. barrier();
  511. // rip寄存器指向内核线程的引导程序
  512. regs.rip = (ul)kernel_thread_func;
  513. barrier();
  514. // kdebug("kernel_thread_func=%#018lx", kernel_thread_func);
  515. // kdebug("&kernel_thread_func=%#018lx", &kernel_thread_func);
  516. // kdebug("1111\tregs.rip = %#018lx", regs.rip);
  517. return do_fork(&regs, flags | CLONE_VM, 0, 0);
  518. }
  519. /**
  520. * @brief 初始化进程模块
  521. * ☆前置条件:已完成系统调用模块的初始化
  522. */
  523. void process_init()
  524. {
  525. kinfo("Initializing process...");
  526. initial_mm.pgd = (pml4t_t *)get_CR3();
  527. initial_mm.code_addr_start = memory_management_struct.kernel_code_start;
  528. initial_mm.code_addr_end = memory_management_struct.kernel_code_end;
  529. initial_mm.data_addr_start = (ul)&_data;
  530. initial_mm.data_addr_end = memory_management_struct.kernel_data_end;
  531. initial_mm.rodata_addr_start = (ul)&_rodata;
  532. initial_mm.rodata_addr_end = (ul)&_erodata;
  533. initial_mm.bss_start = (uint64_t)&_bss;
  534. initial_mm.bss_end = (uint64_t)&_ebss;
  535. initial_mm.brk_start = memory_management_struct.start_brk;
  536. initial_mm.brk_end = current_pcb->addr_limit;
  537. initial_mm.stack_start = _stack_start;
  538. initial_mm.vmas = NULL;
  539. initial_tss[proc_current_cpu_id].rsp0 = initial_thread.rbp;
  540. // ========= 在IDLE进程的顶层页表中添加对内核地址空间的映射 =====================
  541. // 由于IDLE进程的顶层页表的高地址部分会被后续进程所复制,为了使所有进程能够共享相同的内核空间,
  542. // 因此需要先在IDLE进程的顶层页表内映射二级页表
  543. uint64_t *idle_pml4t_vaddr = (uint64_t *)phys_2_virt((uint64_t)get_CR3() & (~0xfffUL));
  544. for (int i = 256; i < 512; ++i)
  545. {
  546. uint64_t *tmp = idle_pml4t_vaddr + i;
  547. barrier();
  548. if (*tmp == 0)
  549. {
  550. void *pdpt = kmalloc(PAGE_4K_SIZE, 0);
  551. barrier();
  552. memset(pdpt, 0, PAGE_4K_SIZE);
  553. barrier();
  554. set_pml4t(tmp, mk_pml4t(virt_2_phys(pdpt), PAGE_KERNEL_PGT));
  555. }
  556. }
  557. barrier();
  558. flush_tlb();
  559. /*
  560. kdebug("initial_thread.rbp=%#018lx", initial_thread.rbp);
  561. kdebug("initial_tss[0].rsp1=%#018lx", initial_tss[0].rsp1);
  562. kdebug("initial_tss[0].ist1=%#018lx", initial_tss[0].ist1);
  563. */
  564. // 初始化pid的写锁
  565. spin_init(&process_global_pid_write_lock);
  566. // 初始化进程的循环链表
  567. list_init(&initial_proc_union.pcb.list);
  568. barrier();
  569. kernel_thread(initial_kernel_thread, 10, CLONE_FS | CLONE_SIGNAL); // 初始化内核线程
  570. barrier();
  571. initial_proc_union.pcb.state = PROC_RUNNING;
  572. initial_proc_union.pcb.preempt_count = 0;
  573. initial_proc_union.pcb.cpu_id = 0;
  574. initial_proc_union.pcb.virtual_runtime = (1UL << 60);
  575. current_pcb->virtual_runtime = (1UL << 60);
  576. }
  577. /**
  578. * @brief fork当前进程
  579. *
  580. * @param regs 新的寄存器值
  581. * @param clone_flags 克隆标志
  582. * @param stack_start 堆栈开始地址
  583. * @param stack_size 堆栈大小
  584. * @return unsigned long
  585. */
  586. unsigned long do_fork(struct pt_regs *regs, unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size)
  587. {
  588. int retval = 0;
  589. struct process_control_block *tsk = NULL;
  590. // 为新的进程分配栈空间,并将pcb放置在底部
  591. tsk = (struct process_control_block *)kmalloc(STACK_SIZE, 0);
  592. barrier();
  593. if (tsk == NULL)
  594. {
  595. retval = -ENOMEM;
  596. return retval;
  597. }
  598. barrier();
  599. memset(tsk, 0, sizeof(struct process_control_block));
  600. io_mfence();
  601. // 将当前进程的pcb复制到新的pcb内
  602. memcpy(tsk, current_pcb, sizeof(struct process_control_block));
  603. io_mfence();
  604. // 初始化进程的循环链表结点
  605. list_init(&tsk->list);
  606. io_mfence();
  607. // 判断是否为内核态调用fork
  608. if (current_pcb->flags & PF_KTHREAD && stack_start != 0)
  609. tsk->flags |= PF_KFORK;
  610. tsk->priority = 2;
  611. tsk->preempt_count = 0;
  612. // 增加全局的pid并赋值给新进程的pid
  613. spin_lock(&process_global_pid_write_lock);
  614. tsk->pid = process_global_pid++;
  615. barrier();
  616. // 加入到进程链表中
  617. tsk->next_pcb = initial_proc_union.pcb.next_pcb;
  618. barrier();
  619. initial_proc_union.pcb.next_pcb = tsk;
  620. barrier();
  621. tsk->parent_pcb = current_pcb;
  622. barrier();
  623. spin_unlock(&process_global_pid_write_lock);
  624. tsk->cpu_id = proc_current_cpu_id;
  625. tsk->state = PROC_UNINTERRUPTIBLE;
  626. tsk->parent_pcb = current_pcb;
  627. wait_queue_init(&tsk->wait_child_proc_exit, NULL);
  628. barrier();
  629. list_init(&tsk->list);
  630. retval = -ENOMEM;
  631. // 拷贝标志位
  632. if (process_copy_flags(clone_flags, tsk))
  633. goto copy_flags_failed;
  634. // 拷贝内存空间分布结构体
  635. if (process_copy_mm(clone_flags, tsk))
  636. goto copy_mm_failed;
  637. // 拷贝文件
  638. if (process_copy_files(clone_flags, tsk))
  639. goto copy_files_failed;
  640. // 拷贝线程结构体
  641. if (process_copy_thread(clone_flags, tsk, stack_start, stack_size, regs))
  642. goto copy_thread_failed;
  643. // 拷贝成功
  644. retval = tsk->pid;
  645. tsk->flags &= ~PF_KFORK;
  646. // 唤醒进程
  647. process_wakeup(tsk);
  648. return retval;
  649. copy_thread_failed:;
  650. // 回收线程
  651. process_exit_thread(tsk);
  652. copy_files_failed:;
  653. // 回收文件
  654. process_exit_files(tsk);
  655. copy_mm_failed:;
  656. // 回收内存空间分布结构体
  657. process_exit_mm(tsk);
  658. copy_flags_failed:;
  659. kfree(tsk);
  660. return retval;
  661. return 0;
  662. }
  663. /**
  664. * @brief 根据pid获取进程的pcb
  665. *
  666. * @param pid
  667. * @return struct process_control_block*
  668. */
  669. struct process_control_block *process_get_pcb(long pid)
  670. {
  671. struct process_control_block *pcb = initial_proc_union.pcb.next_pcb;
  672. // 使用蛮力法搜索指定pid的pcb
  673. // todo: 使用哈希表来管理pcb
  674. for (; pcb != &initial_proc_union.pcb; pcb = pcb->next_pcb)
  675. {
  676. if (pcb->pid == pid)
  677. return pcb;
  678. }
  679. return NULL;
  680. }
  681. /**
  682. * @brief 将进程加入到调度器的就绪队列中
  683. *
  684. * @param pcb 进程的pcb
  685. */
  686. void process_wakeup(struct process_control_block *pcb)
  687. {
  688. pcb->state = PROC_RUNNING;
  689. sched_enqueue(pcb);
  690. }
  691. /**
  692. * @brief 将进程加入到调度器的就绪队列中,并标志当前进程需要被调度
  693. *
  694. * @param pcb 进程的pcb
  695. */
  696. void process_wakeup_immediately(struct process_control_block *pcb)
  697. {
  698. pcb->state = PROC_RUNNING;
  699. sched_enqueue(pcb);
  700. // 将当前进程标志为需要调度,缩短新进程被wakeup的时间
  701. current_pcb->flags |= PF_NEED_SCHED;
  702. }
  703. /**
  704. * @brief 拷贝当前进程的标志位
  705. *
  706. * @param clone_flags 克隆标志位
  707. * @param pcb 新的进程的pcb
  708. * @return uint64_t
  709. */
  710. uint64_t process_copy_flags(uint64_t clone_flags, struct process_control_block *pcb)
  711. {
  712. if (clone_flags & CLONE_VM)
  713. pcb->flags |= PF_VFORK;
  714. return 0;
  715. }
  716. /**
  717. * @brief 拷贝当前进程的文件描述符等信息
  718. *
  719. * @param clone_flags 克隆标志位
  720. * @param pcb 新的进程的pcb
  721. * @return uint64_t
  722. */
  723. uint64_t process_copy_files(uint64_t clone_flags, struct process_control_block *pcb)
  724. {
  725. int retval = 0;
  726. // 如果CLONE_FS被置位,那么子进程与父进程共享文件描述符
  727. // 文件描述符已经在复制pcb时被拷贝
  728. if (clone_flags & CLONE_FS)
  729. return retval;
  730. // 为新进程拷贝新的文件描述符
  731. for (int i = 0; i < PROC_MAX_FD_NUM; ++i)
  732. {
  733. if (current_pcb->fds[i] == NULL)
  734. continue;
  735. pcb->fds[i] = (struct vfs_file_t *)kmalloc(sizeof(struct vfs_file_t), 0);
  736. memcpy(pcb->fds[i], current_pcb->fds[i], sizeof(struct vfs_file_t));
  737. }
  738. return retval;
  739. }
  740. /**
  741. * @brief 回收进程的所有文件描述符
  742. *
  743. * @param pcb 要被回收的进程的pcb
  744. * @return uint64_t
  745. */
  746. uint64_t process_exit_files(struct process_control_block *pcb)
  747. {
  748. // 不与父进程共享文件描述符
  749. if (!(pcb->flags & PF_VFORK))
  750. {
  751. for (int i = 0; i < PROC_MAX_FD_NUM; ++i)
  752. {
  753. if (pcb->fds[i] == NULL)
  754. continue;
  755. kfree(pcb->fds[i]);
  756. }
  757. }
  758. // 清空当前进程的文件描述符列表
  759. memset(pcb->fds, 0, sizeof(struct vfs_file_t *) * PROC_MAX_FD_NUM);
  760. }
  761. /**
  762. * @brief 拷贝当前进程的内存空间分布结构体信息
  763. *
  764. * @param clone_flags 克隆标志位
  765. * @param pcb 新的进程的pcb
  766. * @return uint64_t
  767. */
  768. uint64_t process_copy_mm(uint64_t clone_flags, struct process_control_block *pcb)
  769. {
  770. int retval = 0;
  771. // 与父进程共享内存空间
  772. if (clone_flags & CLONE_VM)
  773. {
  774. pcb->mm = current_pcb->mm;
  775. return retval;
  776. }
  777. // 分配新的内存空间分布结构体
  778. struct mm_struct *new_mms = (struct mm_struct *)kmalloc(sizeof(struct mm_struct), 0);
  779. memset(new_mms, 0, sizeof(struct mm_struct));
  780. memcpy(new_mms, current_pcb->mm, sizeof(struct mm_struct));
  781. new_mms->vmas = NULL;
  782. pcb->mm = new_mms;
  783. // 分配顶层页表, 并设置顶层页表的物理地址
  784. new_mms->pgd = (pml4t_t *)virt_2_phys(kmalloc(PAGE_4K_SIZE, 0));
  785. // 由于高2K部分为内核空间,在接下来需要覆盖其数据,因此不用清零
  786. memset(phys_2_virt(new_mms->pgd), 0, PAGE_4K_SIZE / 2);
  787. // 拷贝内核空间的页表指针
  788. memcpy(phys_2_virt(new_mms->pgd) + 256, phys_2_virt(initial_proc[proc_current_cpu_id]->mm->pgd) + 256, PAGE_4K_SIZE / 2);
  789. uint64_t *current_pgd = (uint64_t *)phys_2_virt(current_pcb->mm->pgd);
  790. uint64_t *new_pml4t = (uint64_t *)phys_2_virt(new_mms->pgd);
  791. // 拷贝用户空间的vma
  792. struct vm_area_struct *vma = current_pcb->mm->vmas;
  793. while (vma != NULL)
  794. {
  795. if (vma->vm_end > USER_MAX_LINEAR_ADDR || vma->vm_flags & VM_DONTCOPY)
  796. {
  797. vma = vma->vm_next;
  798. continue;
  799. }
  800. int64_t vma_size = vma->vm_end - vma->vm_start;
  801. // kdebug("vma_size=%ld, vm_start=%#018lx", vma_size, vma->vm_start);
  802. if (vma_size > PAGE_2M_SIZE / 2)
  803. {
  804. int page_to_alloc = (PAGE_2M_ALIGN(vma_size)) >> PAGE_2M_SHIFT;
  805. for (int i = 0; i < page_to_alloc; ++i)
  806. {
  807. uint64_t pa = alloc_pages(ZONE_NORMAL, 1, PAGE_PGT_MAPPED)->addr_phys;
  808. struct vm_area_struct *new_vma = NULL;
  809. int ret = mm_create_vma(new_mms, vma->vm_start + i * PAGE_2M_SIZE, PAGE_2M_SIZE, vma->vm_flags, vma->vm_ops, &new_vma);
  810. // 防止内存泄露
  811. if (unlikely(ret == -EEXIST))
  812. free_pages(Phy_to_2M_Page(pa), 1);
  813. else
  814. mm_map_vma(new_vma, pa);
  815. memcpy((void *)phys_2_virt(pa), (void *)(vma->vm_start + i * PAGE_2M_SIZE), (vma_size >= PAGE_2M_SIZE) ? PAGE_2M_SIZE : vma_size);
  816. vma_size -= PAGE_2M_SIZE;
  817. }
  818. }
  819. else
  820. {
  821. uint64_t map_size = PAGE_4K_ALIGN(vma_size);
  822. uint64_t va = (uint64_t)kmalloc(map_size, 0);
  823. struct vm_area_struct *new_vma = NULL;
  824. int ret = mm_create_vma(new_mms, vma->vm_start, map_size, vma->vm_flags, vma->vm_ops, &new_vma);
  825. // 防止内存泄露
  826. if (unlikely(ret == -EEXIST))
  827. kfree((void *)va);
  828. else
  829. mm_map_vma(new_vma, virt_2_phys(va));
  830. memcpy((void *)va, (void *)vma->vm_start, vma_size);
  831. }
  832. vma = vma->vm_next;
  833. }
  834. return retval;
  835. }
  836. /**
  837. * @brief 释放进程的页表
  838. *
  839. * @param pcb 要被释放页表的进程
  840. * @return uint64_t
  841. */
  842. uint64_t process_exit_mm(struct process_control_block *pcb)
  843. {
  844. if (pcb->flags & CLONE_VM)
  845. return 0;
  846. if (pcb->mm == NULL)
  847. {
  848. kdebug("pcb->mm==NULL");
  849. return 0;
  850. }
  851. if (pcb->mm->pgd == NULL)
  852. {
  853. kdebug("pcb->mm->pgd==NULL");
  854. return 0;
  855. }
  856. // // 获取顶层页表
  857. pml4t_t *current_pgd = (pml4t_t *)phys_2_virt(pcb->mm->pgd);
  858. // 循环释放VMA中的内存
  859. struct vm_area_struct *vma = pcb->mm->vmas;
  860. while (vma != NULL)
  861. {
  862. struct vm_area_struct *cur_vma = vma;
  863. vma = cur_vma->vm_next;
  864. uint64_t pa;
  865. // kdebug("vm start=%#018lx, sem=%d", cur_vma->vm_start, cur_vma->anon_vma->sem.counter);
  866. mm_unmap_vma(pcb->mm, cur_vma, &pa);
  867. uint64_t size = (cur_vma->vm_end - cur_vma->vm_start);
  868. // 释放内存
  869. switch (size)
  870. {
  871. case PAGE_4K_SIZE:
  872. kfree(phys_2_virt(pa));
  873. break;
  874. default:
  875. break;
  876. }
  877. vm_area_del(cur_vma);
  878. vm_area_free(cur_vma);
  879. }
  880. // 释放顶层页表
  881. kfree(current_pgd);
  882. if (unlikely(pcb->mm->vmas != NULL))
  883. {
  884. kwarn("pcb.mm.vmas!=NULL");
  885. }
  886. // 释放内存空间分布结构体
  887. kfree(pcb->mm);
  888. return 0;
  889. }
  890. /**
  891. * @brief 重写内核栈中的rbp地址
  892. *
  893. * @param new_regs 子进程的reg
  894. * @param new_pcb 子进程的pcb
  895. * @return int
  896. */
  897. static int process_rewrite_rbp(struct pt_regs *new_regs, struct process_control_block *new_pcb)
  898. {
  899. uint64_t new_top = ((uint64_t)new_pcb) + STACK_SIZE;
  900. uint64_t old_top = (uint64_t)(current_pcb) + STACK_SIZE;
  901. uint64_t *rbp = &new_regs->rbp;
  902. uint64_t *tmp = rbp;
  903. // 超出内核栈范围
  904. if ((uint64_t)*rbp >= old_top || (uint64_t)*rbp < (old_top - STACK_SIZE))
  905. return 0;
  906. while (1)
  907. {
  908. // 计算delta
  909. uint64_t delta = old_top - *rbp;
  910. // 计算新的rbp值
  911. uint64_t newVal = new_top - delta;
  912. // 新的值不合法
  913. if (unlikely((uint64_t)newVal >= new_top || (uint64_t)newVal < (new_top - STACK_SIZE)))
  914. break;
  915. // 将新的值写入对应位置
  916. *rbp = newVal;
  917. // 跳转栈帧
  918. rbp = (uint64_t *)*rbp;
  919. }
  920. // 设置内核态fork返回到enter_syscall_int()函数内的时候,rsp寄存器的值
  921. new_regs->rsp = new_top - (old_top - new_regs->rsp);
  922. return 0;
  923. }
  924. /**
  925. * @brief 拷贝当前进程的线程结构体
  926. *
  927. * @param clone_flags 克隆标志位
  928. * @param pcb 新的进程的pcb
  929. * @return uint64_t
  930. */
  931. uint64_t process_copy_thread(uint64_t clone_flags, struct process_control_block *pcb, uint64_t stack_start, uint64_t stack_size, struct pt_regs *current_regs)
  932. {
  933. // 将线程结构体放置在pcb后方
  934. struct thread_struct *thd = (struct thread_struct *)(pcb + 1);
  935. memset(thd, 0, sizeof(struct thread_struct));
  936. pcb->thread = thd;
  937. struct pt_regs *child_regs = NULL;
  938. // 拷贝栈空间
  939. if (pcb->flags & PF_KFORK) // 内核态下的fork
  940. {
  941. // 内核态下则拷贝整个内核栈
  942. uint32_t size = ((uint64_t)current_pcb) + STACK_SIZE - (uint64_t)(current_regs);
  943. child_regs = (struct pt_regs *)(((uint64_t)pcb) + STACK_SIZE - size);
  944. memcpy(child_regs, (void *)current_regs, size);
  945. barrier();
  946. // 然后重写新的栈中,每个栈帧的rbp值
  947. process_rewrite_rbp(child_regs, pcb);
  948. }
  949. else
  950. {
  951. child_regs = (struct pt_regs *)((uint64_t)pcb + STACK_SIZE - sizeof(struct pt_regs));
  952. memcpy(child_regs, current_regs, sizeof(struct pt_regs));
  953. barrier();
  954. child_regs->rsp = stack_start;
  955. }
  956. // 设置子进程的返回值为0
  957. child_regs->rax = 0;
  958. if (pcb->flags & PF_KFORK)
  959. thd->rbp = (uint64_t)(child_regs + 1); // 设置新的内核线程开始执行时的rbp(也就是进入ret_from_system_call时的rbp)
  960. else
  961. thd->rbp = (uint64_t)pcb + STACK_SIZE;
  962. // 设置新的内核线程开始执行的时候的rsp
  963. thd->rsp = (uint64_t)child_regs;
  964. thd->fs = current_pcb->thread->fs;
  965. thd->gs = current_pcb->thread->gs;
  966. // 根据是否为内核线程、是否在内核态fork,设置进程的开始执行的地址
  967. if (pcb->flags & PF_KFORK)
  968. thd->rip = (uint64_t)ret_from_system_call;
  969. else if (pcb->flags & PF_KTHREAD && (!(pcb->flags & PF_KFORK)))
  970. thd->rip = (uint64_t)kernel_thread_func;
  971. else
  972. thd->rip = (uint64_t)ret_from_system_call;
  973. return 0;
  974. }
  975. /**
  976. * @brief todo: 回收线程结构体
  977. *
  978. * @param pcb
  979. */
  980. void process_exit_thread(struct process_control_block *pcb)
  981. {
  982. }
  983. /**
  984. * @brief 申请可用的文件句柄
  985. *
  986. * @return int
  987. */
  988. int process_fd_alloc(struct vfs_file_t *file)
  989. {
  990. int fd_num = -1;
  991. struct vfs_file_t **f = current_pcb->fds;
  992. for (int i = 0; i < PROC_MAX_FD_NUM; ++i)
  993. {
  994. /* 找到指针数组中的空位 */
  995. if (f[i] == NULL)
  996. {
  997. fd_num = i;
  998. f[i] = file;
  999. break;
  1000. }
  1001. }
  1002. return fd_num;
  1003. }