fork.rs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. use alloc::vec::Vec;
  2. use core::{intrinsics::unlikely, sync::atomic::Ordering};
  3. use alloc::{string::ToString, sync::Arc};
  4. use log::error;
  5. use system_error::SystemError;
  6. use crate::{
  7. arch::{interrupt::TrapFrame, ipc::signal::Signal},
  8. filesystem::procfs::procfs_register_pid,
  9. ipc::signal::flush_signal_handlers,
  10. libs::rwlock::RwLock,
  11. mm::VirtAddr,
  12. namespaces::{create_new_namespaces, namespace::USER_NS, pid_namespace::PidStrcut},
  13. process::ProcessFlags,
  14. sched::{sched_cgroup_fork, sched_fork},
  15. smp::core::smp_get_processor_id,
  16. syscall::user_access::UserBufferWriter,
  17. };
  18. use super::{
  19. kthread::{KernelThreadPcbPrivate, WorkerPrivate},
  20. KernelStack, Pid, ProcessControlBlock, ProcessManager,
  21. };
  22. const MAX_PID_NS_LEVEL: usize = 32;
  23. bitflags! {
  24. /// 进程克隆标志
  25. pub struct CloneFlags: u64 {
  26. /// 在进程间共享虚拟内存空间
  27. const CLONE_VM = 0x00000100;
  28. /// 在进程间共享文件系统信息
  29. const CLONE_FS = 0x00000200;
  30. /// 共享打开的文件
  31. const CLONE_FILES = 0x00000400;
  32. /// 克隆时,与父进程共享信号处理结构体
  33. const CLONE_SIGHAND = 0x00000800;
  34. /// 返回进程的文件描述符
  35. const CLONE_PIDFD = 0x00001000;
  36. /// 使克隆对象成为父进程的跟踪对象
  37. const CLONE_PTRACE = 0x00002000;
  38. /// 在执行 exec() 或 _exit() 之前挂起父进程的执行
  39. const CLONE_VFORK = 0x00004000;
  40. /// 使克隆对象的父进程为调用进程的父进程
  41. const CLONE_PARENT = 0x00008000;
  42. /// 拷贝线程
  43. const CLONE_THREAD = 0x00010000;
  44. /// 创建一个新的命名空间,其中包含独立的文件系统挂载点层次结构。
  45. const CLONE_NEWNS = 0x00020000;
  46. /// 与父进程共享 System V 信号量。
  47. const CLONE_SYSVSEM = 0x00040000;
  48. /// 设置其线程本地存储
  49. const CLONE_SETTLS = 0x00080000;
  50. /// 设置partent_tid地址为子进程线程 ID
  51. const CLONE_PARENT_SETTID = 0x00100000;
  52. /// 在子进程中设置一个清除线程 ID 的用户空间地址
  53. const CLONE_CHILD_CLEARTID = 0x00200000;
  54. /// 创建一个新线程,将其设置为分离状态
  55. const CLONE_DETACHED = 0x00400000;
  56. /// 使其在创建者进程或线程视角下成为无法跟踪的。
  57. const CLONE_UNTRACED = 0x00800000;
  58. /// 设置其子进程线程 ID
  59. const CLONE_CHILD_SETTID = 0x01000000;
  60. /// 将其放置在一个新的 cgroup 命名空间中
  61. const CLONE_NEWCGROUP = 0x02000000;
  62. /// 将其放置在一个新的 UTS 命名空间中
  63. const CLONE_NEWUTS = 0x04000000;
  64. /// 将其放置在一个新的 IPC 命名空间中
  65. const CLONE_NEWIPC = 0x08000000;
  66. /// 将其放置在一个新的用户命名空间中
  67. const CLONE_NEWUSER = 0x10000000;
  68. /// 将其放置在一个新的 PID 命名空间中
  69. const CLONE_NEWPID = 0x20000000;
  70. /// 将其放置在一个新的网络命名空间中
  71. const CLONE_NEWNET = 0x40000000;
  72. /// 在新的 I/O 上下文中运行它
  73. const CLONE_IO = 0x80000000;
  74. /// 克隆时,与父进程共享信号结构体
  75. const CLONE_SIGNAL = 0x00010000 | 0x00000800;
  76. /// 克隆时,将原本被设置为SIG_IGNORE的信号,设置回SIG_DEFAULT
  77. const CLONE_CLEAR_SIGHAND = 0x100000000;
  78. }
  79. }
  80. /// ## clone与clone3系统调用的参数载体
  81. ///
  82. /// 因为这两个系统调用的参数很多,所以有这样一个载体更灵活
  83. ///
  84. /// 仅仅作为参数传递
  85. #[allow(dead_code)]
  86. #[derive(Debug, Clone)]
  87. pub struct KernelCloneArgs {
  88. pub flags: CloneFlags,
  89. // 下列属性均来自用户空间
  90. pub pidfd: VirtAddr,
  91. pub child_tid: VirtAddr,
  92. pub parent_tid: VirtAddr,
  93. pub set_tid: Vec<usize>,
  94. /// 进程退出时发送的信号
  95. pub exit_signal: Signal,
  96. pub stack: usize,
  97. // clone3用到
  98. pub stack_size: usize,
  99. pub tls: usize,
  100. pub set_tid_size: usize,
  101. pub cgroup: i32,
  102. pub io_thread: bool,
  103. pub kthread: bool,
  104. pub idle: bool,
  105. pub func: VirtAddr,
  106. pub fn_arg: VirtAddr,
  107. // cgrp 和 cset?
  108. }
  109. impl KernelCloneArgs {
  110. pub fn new() -> Self {
  111. let null_addr = VirtAddr::new(0);
  112. Self {
  113. flags: unsafe { CloneFlags::from_bits_unchecked(0) },
  114. pidfd: null_addr,
  115. child_tid: null_addr,
  116. parent_tid: null_addr,
  117. set_tid: Vec::with_capacity(MAX_PID_NS_LEVEL),
  118. exit_signal: Signal::SIGCHLD,
  119. stack: 0,
  120. stack_size: 0,
  121. tls: 0,
  122. set_tid_size: 0,
  123. cgroup: 0,
  124. io_thread: false,
  125. kthread: false,
  126. idle: false,
  127. func: null_addr,
  128. fn_arg: null_addr,
  129. }
  130. }
  131. }
  132. impl ProcessManager {
  133. /// 创建一个新进程
  134. ///
  135. /// ## 参数
  136. ///
  137. /// - `current_trapframe`: 当前进程的trapframe
  138. /// - `clone_flags`: 进程克隆标志
  139. ///
  140. /// ## 返回值
  141. ///
  142. /// - 成功:返回新进程的pid
  143. /// - 失败:返回Err(SystemError),fork失败的话,子线程不会执行。
  144. ///
  145. /// ## Safety
  146. ///
  147. /// - fork失败的话,子线程不会执行。
  148. pub fn fork(
  149. current_trapframe: &TrapFrame,
  150. clone_flags: CloneFlags,
  151. ) -> Result<Pid, SystemError> {
  152. let current_pcb = ProcessManager::current_pcb();
  153. let new_kstack: KernelStack = KernelStack::new()?;
  154. let name = current_pcb.basic().name().to_string();
  155. let pcb = ProcessControlBlock::new(name, new_kstack);
  156. let mut args = KernelCloneArgs::new();
  157. args.flags = clone_flags;
  158. args.exit_signal = Signal::SIGCHLD;
  159. Self::copy_process(&current_pcb, &pcb, args, current_trapframe).map_err(|e| {
  160. error!(
  161. "fork: Failed to copy process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
  162. current_pcb.pid(),
  163. pcb.pid(),
  164. e
  165. );
  166. e
  167. })?;
  168. ProcessManager::add_pcb(pcb.clone());
  169. // 向procfs注册进程
  170. procfs_register_pid(pcb.pid()).unwrap_or_else(|e| {
  171. panic!(
  172. "fork: Failed to register pid to procfs, pid: [{:?}]. Error: {:?}",
  173. pcb.pid(),
  174. e
  175. )
  176. });
  177. pcb.sched_info().set_on_cpu(Some(smp_get_processor_id()));
  178. ProcessManager::wakeup(&pcb).unwrap_or_else(|e| {
  179. panic!(
  180. "fork: Failed to wakeup new process, pid: [{:?}]. Error: {:?}",
  181. pcb.pid(),
  182. e
  183. )
  184. });
  185. return Ok(pcb.pid());
  186. }
  187. fn copy_flags(
  188. clone_flags: &CloneFlags,
  189. new_pcb: &Arc<ProcessControlBlock>,
  190. ) -> Result<(), SystemError> {
  191. if clone_flags.contains(CloneFlags::CLONE_VM) {
  192. new_pcb.flags().insert(ProcessFlags::VFORK);
  193. }
  194. *new_pcb.flags.get_mut() = *ProcessManager::current_pcb().flags();
  195. return Ok(());
  196. }
  197. /// 拷贝进程的地址空间
  198. ///
  199. /// ## 参数
  200. ///
  201. /// - `clone_vm`: 是否与父进程共享地址空间。true表示共享
  202. /// - `new_pcb`: 新进程的pcb
  203. ///
  204. /// ## 返回值
  205. ///
  206. /// - 成功:返回Ok(())
  207. /// - 失败:返回Err(SystemError)
  208. ///
  209. /// ## Panic
  210. ///
  211. /// - 如果当前进程没有用户地址空间,则panic
  212. #[inline(never)]
  213. fn copy_mm(
  214. clone_flags: &CloneFlags,
  215. current_pcb: &Arc<ProcessControlBlock>,
  216. new_pcb: &Arc<ProcessControlBlock>,
  217. ) -> Result<(), SystemError> {
  218. let old_address_space = current_pcb.basic().user_vm().unwrap_or_else(|| {
  219. panic!(
  220. "copy_mm: Failed to get address space of current process, current pid: [{:?}]",
  221. current_pcb.pid()
  222. )
  223. });
  224. if clone_flags.contains(CloneFlags::CLONE_VM) {
  225. unsafe { new_pcb.basic_mut().set_user_vm(Some(old_address_space)) };
  226. return Ok(());
  227. }
  228. let new_address_space = old_address_space.write_irqsave().try_clone().unwrap_or_else(|e| {
  229. panic!(
  230. "copy_mm: Failed to clone address space of current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
  231. current_pcb.pid(), new_pcb.pid(), e
  232. )
  233. });
  234. unsafe { new_pcb.basic_mut().set_user_vm(Some(new_address_space)) };
  235. return Ok(());
  236. }
  237. #[inline(never)]
  238. fn copy_namespaces(
  239. clone_flags: &CloneFlags,
  240. current_pcb: &Arc<ProcessControlBlock>,
  241. new_pcb: &Arc<ProcessControlBlock>,
  242. ) -> Result<(), SystemError> {
  243. if !clone_flags.contains(CloneFlags::CLONE_NEWNS)
  244. && !clone_flags.contains(CloneFlags::CLONE_NEWUTS)
  245. && !clone_flags.contains(CloneFlags::CLONE_NEWIPC)
  246. && !clone_flags.contains(CloneFlags::CLONE_NEWPID)
  247. && !clone_flags.contains(CloneFlags::CLONE_NEWNET)
  248. && !clone_flags.contains(CloneFlags::CLONE_NEWCGROUP)
  249. {
  250. new_pcb.set_nsproxy(current_pcb.get_nsproxy().read().clone());
  251. return Ok(());
  252. }
  253. if clone_flags.contains(CloneFlags::CLONE_NEWIPC)
  254. && clone_flags.contains(CloneFlags::CLONE_SYSVSEM)
  255. {
  256. return Err(SystemError::EINVAL);
  257. }
  258. let new_nsproxy = create_new_namespaces(clone_flags.bits(), current_pcb, USER_NS.clone())?;
  259. *new_pcb.nsproxy.write() = new_nsproxy;
  260. Ok(())
  261. }
  262. #[inline(never)]
  263. fn copy_files(
  264. clone_flags: &CloneFlags,
  265. current_pcb: &Arc<ProcessControlBlock>,
  266. new_pcb: &Arc<ProcessControlBlock>,
  267. ) -> Result<(), SystemError> {
  268. // 如果不共享文件描述符表,则拷贝文件描述符表
  269. if !clone_flags.contains(CloneFlags::CLONE_FILES) {
  270. let new_fd_table = current_pcb.basic().fd_table().unwrap().read().clone();
  271. let new_fd_table = Arc::new(RwLock::new(new_fd_table));
  272. new_pcb.basic_mut().set_fd_table(Some(new_fd_table));
  273. } else {
  274. // 如果共享文件描述符表,则直接拷贝指针
  275. new_pcb
  276. .basic_mut()
  277. .set_fd_table(current_pcb.basic().fd_table().clone());
  278. }
  279. return Ok(());
  280. }
  281. #[allow(dead_code)]
  282. fn copy_sighand(
  283. clone_flags: &CloneFlags,
  284. current_pcb: &Arc<ProcessControlBlock>,
  285. new_pcb: &Arc<ProcessControlBlock>,
  286. ) -> Result<(), SystemError> {
  287. // // 将信号的处理函数设置为default(除了那些被手动屏蔽的)
  288. if clone_flags.contains(CloneFlags::CLONE_CLEAR_SIGHAND) {
  289. flush_signal_handlers(new_pcb.clone(), false);
  290. }
  291. if clone_flags.contains(CloneFlags::CLONE_SIGHAND) {
  292. new_pcb.sig_struct_irqsave().handlers = current_pcb.sig_struct_irqsave().handlers;
  293. }
  294. return Ok(());
  295. }
  296. /// 拷贝进程信息
  297. ///
  298. /// ## panic:
  299. /// 某一步拷贝失败时会引发panic
  300. /// 例如:copy_mm等失败时会触发panic
  301. ///
  302. /// ## 参数
  303. ///
  304. /// - clone_flags 标志位
  305. /// - current_pcb 拷贝源pcb
  306. /// - pcb 目标pcb
  307. ///
  308. /// ## return
  309. /// - 发生错误时返回Err(SystemError)
  310. #[inline(never)]
  311. pub fn copy_process(
  312. current_pcb: &Arc<ProcessControlBlock>,
  313. pcb: &Arc<ProcessControlBlock>,
  314. clone_args: KernelCloneArgs,
  315. current_trapframe: &TrapFrame,
  316. ) -> Result<(), SystemError> {
  317. let clone_flags = clone_args.flags;
  318. // 不允许与不同namespace的进程共享根目录
  319. if (clone_flags == (CloneFlags::CLONE_NEWNS | CloneFlags::CLONE_FS))
  320. || clone_flags == (CloneFlags::CLONE_NEWUSER | CloneFlags::CLONE_FS)
  321. {
  322. return Err(SystemError::EINVAL);
  323. }
  324. // 线程组必须共享信号,分离线程只能在线程组内启动。
  325. if clone_flags.contains(CloneFlags::CLONE_THREAD)
  326. && !clone_flags.contains(CloneFlags::CLONE_SIGHAND)
  327. {
  328. return Err(SystemError::EINVAL);
  329. }
  330. // 共享信号处理器意味着共享vm。
  331. // 线程组也意味着共享vm。阻止这种情况可以简化其他代码。
  332. if clone_flags.contains(CloneFlags::CLONE_SIGHAND)
  333. && !clone_flags.contains(CloneFlags::CLONE_VM)
  334. {
  335. return Err(SystemError::EINVAL);
  336. }
  337. // TODO: 处理CLONE_PARENT 与 SIGNAL_UNKILLABLE的情况
  338. // 如果新进程使用不同的 pid 或 namespace,
  339. // 则不允许它与分叉任务共享线程组。
  340. if clone_flags.contains(CloneFlags::CLONE_THREAD)
  341. && clone_flags.contains(CloneFlags::CLONE_NEWUSER | CloneFlags::CLONE_NEWPID)
  342. {
  343. return Err(SystemError::EINVAL);
  344. // TODO: 判断新进程与当前进程namespace是否相同,不同则返回错误
  345. }
  346. // 如果新进程将处于不同的time namespace,
  347. // 则不能让它共享vm或线程组。
  348. if clone_flags.contains(CloneFlags::CLONE_THREAD | CloneFlags::CLONE_VM) {
  349. // TODO: 判断time namespace,不同则返回错误
  350. }
  351. if clone_flags.contains(CloneFlags::CLONE_PIDFD)
  352. && clone_flags.contains(CloneFlags::CLONE_DETACHED | CloneFlags::CLONE_THREAD)
  353. {
  354. return Err(SystemError::EINVAL);
  355. }
  356. // TODO: 克隆前应该锁信号处理,等待克隆完成后再处理
  357. // 克隆架构相关
  358. let guard = current_pcb.arch_info_irqsave();
  359. unsafe { pcb.arch_info().clone_from(&guard) };
  360. drop(guard);
  361. // 为内核线程设置WorkerPrivate
  362. if current_pcb.flags().contains(ProcessFlags::KTHREAD) {
  363. *pcb.worker_private() =
  364. Some(WorkerPrivate::KernelThread(KernelThreadPcbPrivate::new()));
  365. }
  366. // 设置clear_child_tid,在线程结束时将其置0以通知父进程
  367. if clone_flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) {
  368. pcb.thread.write_irqsave().clear_child_tid = Some(clone_args.child_tid);
  369. }
  370. // 设置child_tid,意味着子线程能够知道自己的id
  371. if clone_flags.contains(CloneFlags::CLONE_CHILD_SETTID) {
  372. pcb.thread.write_irqsave().set_child_tid = Some(clone_args.child_tid);
  373. }
  374. // 将子进程/线程的id存储在用户态传进的地址中
  375. if clone_flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
  376. let mut writer = UserBufferWriter::new(
  377. clone_args.parent_tid.data() as *mut i32,
  378. core::mem::size_of::<i32>(),
  379. true,
  380. )?;
  381. writer.copy_one_to_user(&(pcb.pid().0 as i32), 0)?;
  382. }
  383. sched_fork(pcb).unwrap_or_else(|e| {
  384. panic!(
  385. "fork: Failed to set sched info from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
  386. current_pcb.pid(), pcb.pid(), e
  387. )
  388. });
  389. // 拷贝标志位
  390. Self::copy_flags(&clone_flags, pcb).unwrap_or_else(|e| {
  391. panic!(
  392. "fork: Failed to copy flags from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
  393. current_pcb.pid(), pcb.pid(), e
  394. )
  395. });
  396. // 拷贝用户地址空间
  397. Self::copy_mm(&clone_flags, current_pcb, pcb).unwrap_or_else(|e| {
  398. panic!(
  399. "fork: Failed to copy mm from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
  400. current_pcb.pid(), pcb.pid(), e
  401. )
  402. });
  403. Self::copy_namespaces(&clone_flags, current_pcb, pcb).unwrap_or_else(|e|{
  404. panic!("fork: Failed to copy namespace form current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
  405. current_pcb.pid(), pcb.pid(), e)
  406. });
  407. // 拷贝文件描述符表
  408. Self::copy_files(&clone_flags, current_pcb, pcb).unwrap_or_else(|e| {
  409. panic!(
  410. "fork: Failed to copy files from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
  411. current_pcb.pid(), pcb.pid(), e
  412. )
  413. });
  414. // 拷贝信号相关数据
  415. Self::copy_sighand(&clone_flags, current_pcb, pcb).unwrap_or_else(|e| {
  416. panic!(
  417. "fork: Failed to copy sighand from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
  418. current_pcb.pid(), pcb.pid(), e
  419. )
  420. });
  421. // 拷贝线程
  422. Self::copy_thread(current_pcb, pcb, &clone_args, current_trapframe).unwrap_or_else(|e| {
  423. panic!(
  424. "fork: Failed to copy thread from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
  425. current_pcb.pid(), pcb.pid(), e
  426. )
  427. });
  428. if current_pcb.pid() != Pid(0) {
  429. let new_pid = PidStrcut::alloc_pid(
  430. pcb.get_nsproxy().read().pid_namespace.clone(), // 获取命名空间
  431. clone_args.set_tid.clone(),
  432. )?;
  433. *pcb.thread_pid.write() = new_pid;
  434. }
  435. // 设置线程组id、组长
  436. if clone_flags.contains(CloneFlags::CLONE_THREAD) {
  437. pcb.thread.write_irqsave().group_leader =
  438. current_pcb.thread.read_irqsave().group_leader.clone();
  439. unsafe {
  440. let ptr = pcb.as_ref() as *const ProcessControlBlock as *mut ProcessControlBlock;
  441. (*ptr).tgid = current_pcb.tgid;
  442. }
  443. } else {
  444. pcb.thread.write_irqsave().group_leader = Arc::downgrade(pcb);
  445. unsafe {
  446. let ptr = pcb.as_ref() as *const ProcessControlBlock as *mut ProcessControlBlock;
  447. (*ptr).tgid = pcb.tgid;
  448. }
  449. }
  450. // CLONE_PARENT re-uses the old parent
  451. if clone_flags.contains(CloneFlags::CLONE_PARENT | CloneFlags::CLONE_THREAD) {
  452. *pcb.real_parent_pcb.write_irqsave() =
  453. current_pcb.real_parent_pcb.read_irqsave().clone();
  454. if clone_flags.contains(CloneFlags::CLONE_THREAD) {
  455. pcb.exit_signal.store(Signal::INVALID, Ordering::SeqCst);
  456. } else {
  457. let leader = current_pcb.thread.read_irqsave().group_leader();
  458. if unlikely(leader.is_none()) {
  459. panic!(
  460. "fork: Failed to get leader of current process, current pid: [{:?}]",
  461. current_pcb.pid()
  462. );
  463. }
  464. pcb.exit_signal.store(
  465. leader.unwrap().exit_signal.load(Ordering::SeqCst),
  466. Ordering::SeqCst,
  467. );
  468. }
  469. } else {
  470. // 新创建的进程,设置其父进程为当前进程
  471. *pcb.real_parent_pcb.write_irqsave() = Arc::downgrade(current_pcb);
  472. pcb.exit_signal
  473. .store(clone_args.exit_signal, Ordering::SeqCst);
  474. }
  475. // todo: 增加线程组相关的逻辑。 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/fork.c#2437
  476. sched_cgroup_fork(pcb);
  477. Ok(())
  478. }
  479. }