syscall.rs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. use core::ffi::c_void;
  2. use alloc::{
  3. ffi::CString,
  4. string::{String, ToString},
  5. sync::Arc,
  6. vec::Vec,
  7. };
  8. use log::error;
  9. use system_error::SystemError;
  10. use super::{
  11. abi::WaitOption,
  12. cred::{Kgid, Kuid},
  13. exec::{load_binary_file, ExecParam, ExecParamFlags},
  14. exit::kernel_wait4,
  15. fork::{CloneFlags, KernelCloneArgs},
  16. resource::{RLimit64, RLimitID, RUsage, RUsageWho},
  17. KernelStack, Pid, ProcessManager,
  18. };
  19. use crate::{
  20. arch::{interrupt::TrapFrame, CurrentIrqArch, MMArch},
  21. exception::InterruptArch,
  22. filesystem::{
  23. procfs::procfs_register_pid,
  24. vfs::{file::FileDescriptorVec, MAX_PATHLEN},
  25. },
  26. mm::{
  27. ucontext::{AddressSpace, UserStack},
  28. verify_area, MemoryManagementArch, VirtAddr,
  29. },
  30. process::ProcessControlBlock,
  31. sched::completion::Completion,
  32. syscall::{
  33. user_access::{check_and_clone_cstr, check_and_clone_cstr_array, UserBufferWriter},
  34. Syscall,
  35. },
  36. };
  37. //参考资料:https://code.dragonos.org.cn/xref/linux-6.1.9/include/uapi/linux/utsname.h#17
  38. #[repr(C)]
  39. #[derive(Debug, Clone, Copy)]
  40. pub struct PosixOldUtsName {
  41. pub sysname: [u8; 65],
  42. pub nodename: [u8; 65],
  43. pub release: [u8; 65],
  44. pub version: [u8; 65],
  45. pub machine: [u8; 65],
  46. }
  47. impl PosixOldUtsName {
  48. pub fn new() -> Self {
  49. const SYS_NAME: &[u8] = b"DragonOS";
  50. const NODENAME: &[u8] = b"DragonOS";
  51. const RELEASE: &[u8] = env!("CARGO_PKG_VERSION").as_bytes();
  52. const VERSION: &[u8] = env!("CARGO_PKG_VERSION").as_bytes();
  53. #[cfg(target_arch = "x86_64")]
  54. const MACHINE: &[u8] = b"x86_64";
  55. #[cfg(target_arch = "aarch64")]
  56. const MACHINE: &[u8] = b"aarch64";
  57. #[cfg(target_arch = "riscv64")]
  58. const MACHINE: &[u8] = b"riscv64";
  59. let mut r = Self {
  60. sysname: [0; 65],
  61. nodename: [0; 65],
  62. release: [0; 65],
  63. version: [0; 65],
  64. machine: [0; 65],
  65. };
  66. r.sysname[0..SYS_NAME.len()].copy_from_slice(SYS_NAME);
  67. r.nodename[0..NODENAME.len()].copy_from_slice(NODENAME);
  68. r.release[0..RELEASE.len()].copy_from_slice(RELEASE);
  69. r.version[0..VERSION.len()].copy_from_slice(VERSION);
  70. r.machine[0..MACHINE.len()].copy_from_slice(MACHINE);
  71. return r;
  72. }
  73. }
  74. impl Syscall {
  75. pub fn fork(frame: &TrapFrame) -> Result<usize, SystemError> {
  76. ProcessManager::fork(frame, CloneFlags::empty()).map(|pid| pid.into())
  77. }
  78. pub fn vfork(frame: &TrapFrame) -> Result<usize, SystemError> {
  79. // 由于Linux vfork需要保证子进程先运行(除非子进程调用execve或者exit),
  80. // 而我们目前没有实现这个特性,所以暂时使用fork代替vfork(linux文档表示这样也是也可以的)
  81. Self::fork(frame)
  82. // 下面是以前的实现,除非我们实现了子进程先运行的特性,否则不要使用,不然会导致父进程数据损坏
  83. // ProcessManager::fork(
  84. // frame,
  85. // CloneFlags::CLONE_VM | CloneFlags::CLONE_FS | CloneFlags::CLONE_SIGNAL,
  86. // )
  87. // .map(|pid| pid.into())
  88. }
  89. pub fn execve(
  90. path: *const u8,
  91. argv: *const *const u8,
  92. envp: *const *const u8,
  93. frame: &mut TrapFrame,
  94. ) -> Result<(), SystemError> {
  95. // debug!(
  96. // "execve path: {:?}, argv: {:?}, envp: {:?}\n",
  97. // path,
  98. // argv,
  99. // envp
  100. // );
  101. // debug!(
  102. // "before execve: strong count: {}",
  103. // Arc::strong_count(&ProcessManager::current_pcb())
  104. // );
  105. if path.is_null() {
  106. return Err(SystemError::EINVAL);
  107. }
  108. let x = || {
  109. let path: CString = check_and_clone_cstr(path, Some(MAX_PATHLEN))?;
  110. let argv: Vec<CString> = check_and_clone_cstr_array(argv)?;
  111. let envp: Vec<CString> = check_and_clone_cstr_array(envp)?;
  112. Ok((path, argv, envp))
  113. };
  114. let (path, argv, envp) = x().inspect_err(|e: &SystemError| {
  115. error!("Failed to execve: {:?}", e);
  116. })?;
  117. let path = path.into_string().map_err(|_| SystemError::EINVAL)?;
  118. ProcessManager::current_pcb()
  119. .basic_mut()
  120. .set_name(ProcessControlBlock::generate_name(&path, &argv));
  121. Self::do_execve(path, argv, envp, frame)?;
  122. // 关闭设置了O_CLOEXEC的文件描述符
  123. let fd_table = ProcessManager::current_pcb().fd_table();
  124. fd_table.write().close_on_exec();
  125. // debug!(
  126. // "after execve: strong count: {}",
  127. // Arc::strong_count(&ProcessManager::current_pcb())
  128. // );
  129. return Ok(());
  130. }
  131. pub fn do_execve(
  132. path: String,
  133. argv: Vec<CString>,
  134. envp: Vec<CString>,
  135. regs: &mut TrapFrame,
  136. ) -> Result<(), SystemError> {
  137. let address_space = AddressSpace::new(true).expect("Failed to create new address space");
  138. // debug!("to load binary file");
  139. let mut param = ExecParam::new(path.as_str(), address_space.clone(), ExecParamFlags::EXEC)?;
  140. let old_vm = do_execve_switch_user_vm(address_space.clone());
  141. // 加载可执行文件
  142. let load_result = load_binary_file(&mut param).inspect_err(|_| {
  143. if let Some(old_vm) = old_vm {
  144. do_execve_switch_user_vm(old_vm);
  145. }
  146. })?;
  147. // debug!("load binary file done");
  148. // debug!("argv: {:?}, envp: {:?}", argv, envp);
  149. param.init_info_mut().args = argv;
  150. param.init_info_mut().envs = envp;
  151. // 把proc_init_info写到用户栈上
  152. let mut ustack_message = unsafe {
  153. address_space
  154. .write()
  155. .user_stack_mut()
  156. .expect("No user stack found")
  157. .clone_info_only()
  158. };
  159. let (user_sp, argv_ptr) = unsafe {
  160. param
  161. .init_info()
  162. .push_at(
  163. // address_space
  164. // .write()
  165. // .user_stack_mut()
  166. // .expect("No user stack found"),
  167. &mut ustack_message,
  168. )
  169. .expect("Failed to push proc_init_info to user stack")
  170. };
  171. address_space.write().user_stack = Some(ustack_message);
  172. Self::arch_do_execve(regs, &param, &load_result, user_sp, argv_ptr)
  173. }
  174. pub fn wait4(
  175. pid: i64,
  176. wstatus: *mut i32,
  177. options: i32,
  178. rusage: *mut c_void,
  179. ) -> Result<usize, SystemError> {
  180. let options = WaitOption::from_bits(options as u32).ok_or(SystemError::EINVAL)?;
  181. let wstatus_buf = if wstatus.is_null() {
  182. None
  183. } else {
  184. Some(UserBufferWriter::new(
  185. wstatus,
  186. core::mem::size_of::<i32>(),
  187. true,
  188. )?)
  189. };
  190. let mut tmp_rusage = if rusage.is_null() {
  191. None
  192. } else {
  193. Some(RUsage::default())
  194. };
  195. let r = kernel_wait4(pid, wstatus_buf, options, tmp_rusage.as_mut())?;
  196. if !rusage.is_null() {
  197. let mut rusage_buf = UserBufferWriter::new::<RUsage>(
  198. rusage as *mut RUsage,
  199. core::mem::size_of::<RUsage>(),
  200. true,
  201. )?;
  202. rusage_buf.copy_one_to_user(&tmp_rusage.unwrap(), 0)?;
  203. }
  204. return Ok(r);
  205. }
  206. /// # 退出进程
  207. ///
  208. /// ## 参数
  209. ///
  210. /// - status: 退出状态
  211. pub fn exit(status: usize) -> ! {
  212. ProcessManager::exit(status);
  213. }
  214. /// @brief 获取当前进程的pid
  215. pub fn getpid() -> Result<Pid, SystemError> {
  216. let current_pcb = ProcessManager::current_pcb();
  217. // if let Some(pid_ns) = &current_pcb.get_nsproxy().read().pid_namespace {
  218. // // 获取该进程在命名空间中的 PID
  219. // return Ok(current_pcb.pid_strcut().read().numbers[pid_ns.level].nr);
  220. // // 返回命名空间中的 PID
  221. // }
  222. // 默认返回 tgid
  223. Ok(current_pcb.tgid())
  224. }
  225. /// @brief 获取指定进程的pgid
  226. ///
  227. /// @param pid 指定一个进程号
  228. ///
  229. /// @return 成功,指定进程的进程组id
  230. /// @return 错误,不存在该进程
  231. pub fn getpgid(mut pid: Pid) -> Result<Pid, SystemError> {
  232. if pid == Pid(0) {
  233. let current_pcb = ProcessManager::current_pcb();
  234. pid = current_pcb.pid();
  235. }
  236. let target_proc = ProcessManager::find(pid).ok_or(SystemError::ESRCH)?;
  237. return Ok(target_proc.basic().pgid());
  238. }
  239. /// @brief 获取当前进程的父进程id
  240. ///
  241. /// 若为initproc则ppid设置为0
  242. pub fn getppid() -> Result<Pid, SystemError> {
  243. let current_pcb = ProcessManager::current_pcb();
  244. return Ok(current_pcb.basic().ppid());
  245. }
  246. pub fn clone(
  247. current_trapframe: &TrapFrame,
  248. clone_args: KernelCloneArgs,
  249. ) -> Result<usize, SystemError> {
  250. let flags = clone_args.flags;
  251. let vfork = Arc::new(Completion::new());
  252. if flags.contains(CloneFlags::CLONE_PIDFD)
  253. && flags.contains(CloneFlags::CLONE_PARENT_SETTID)
  254. {
  255. return Err(SystemError::EINVAL);
  256. }
  257. let current_pcb = ProcessManager::current_pcb();
  258. let new_kstack = KernelStack::new()?;
  259. let name = current_pcb.basic().name().to_string();
  260. let pcb = ProcessControlBlock::new(name, new_kstack);
  261. // 克隆pcb
  262. ProcessManager::copy_process(&current_pcb, &pcb, clone_args, current_trapframe)?;
  263. ProcessManager::add_pcb(pcb.clone());
  264. // 向procfs注册进程
  265. procfs_register_pid(pcb.pid()).unwrap_or_else(|e| {
  266. panic!(
  267. "fork: Failed to register pid to procfs, pid: [{:?}]. Error: {:?}",
  268. pcb.pid(),
  269. e
  270. )
  271. });
  272. if flags.contains(CloneFlags::CLONE_VFORK) {
  273. pcb.thread.write_irqsave().vfork_done = Some(vfork.clone());
  274. }
  275. if pcb.thread.read_irqsave().set_child_tid.is_some() {
  276. let addr = pcb.thread.read_irqsave().set_child_tid.unwrap();
  277. let mut writer =
  278. UserBufferWriter::new(addr.as_ptr::<i32>(), core::mem::size_of::<i32>(), true)?;
  279. writer.copy_one_to_user(&(pcb.pid().data() as i32), 0)?;
  280. }
  281. ProcessManager::wakeup(&pcb).unwrap_or_else(|e| {
  282. panic!(
  283. "fork: Failed to wakeup new process, pid: [{:?}]. Error: {:?}",
  284. pcb.pid(),
  285. e
  286. )
  287. });
  288. if flags.contains(CloneFlags::CLONE_VFORK) {
  289. // 等待子进程结束或者exec;
  290. vfork.wait_for_completion_interruptible()?;
  291. }
  292. return Ok(pcb.pid().0);
  293. }
  294. /// 设置线程地址
  295. pub fn set_tid_address(ptr: usize) -> Result<usize, SystemError> {
  296. verify_area(VirtAddr::new(ptr), core::mem::size_of::<i32>())
  297. .map_err(|_| SystemError::EFAULT)?;
  298. let pcb = ProcessManager::current_pcb();
  299. pcb.thread.write_irqsave().clear_child_tid = Some(VirtAddr::new(ptr));
  300. Ok(pcb.pid.0)
  301. }
  302. pub fn gettid() -> Result<Pid, SystemError> {
  303. let pcb = ProcessManager::current_pcb();
  304. Ok(pcb.pid)
  305. }
  306. pub fn getuid() -> Result<usize, SystemError> {
  307. let pcb = ProcessManager::current_pcb();
  308. return Ok(pcb.cred.lock().uid.data());
  309. }
  310. pub fn getgid() -> Result<usize, SystemError> {
  311. let pcb = ProcessManager::current_pcb();
  312. return Ok(pcb.cred.lock().gid.data());
  313. }
  314. pub fn geteuid() -> Result<usize, SystemError> {
  315. let pcb = ProcessManager::current_pcb();
  316. return Ok(pcb.cred.lock().euid.data());
  317. }
  318. pub fn getegid() -> Result<usize, SystemError> {
  319. let pcb = ProcessManager::current_pcb();
  320. return Ok(pcb.cred.lock().egid.data());
  321. }
  322. pub fn setuid(uid: usize) -> Result<usize, SystemError> {
  323. let pcb = ProcessManager::current_pcb();
  324. let mut guard = pcb.cred.lock();
  325. if guard.uid.data() == 0 {
  326. guard.setuid(uid);
  327. guard.seteuid(uid);
  328. guard.setsuid(uid);
  329. } else if uid == guard.uid.data() || uid == guard.suid.data() {
  330. guard.seteuid(uid);
  331. } else {
  332. return Err(SystemError::EPERM);
  333. }
  334. return Ok(0);
  335. }
  336. pub fn setgid(gid: usize) -> Result<usize, SystemError> {
  337. let pcb = ProcessManager::current_pcb();
  338. let mut guard = pcb.cred.lock();
  339. if guard.egid.data() == 0 {
  340. guard.setgid(gid);
  341. guard.setegid(gid);
  342. guard.setsgid(gid);
  343. guard.setfsgid(gid);
  344. } else if guard.gid.data() == gid || guard.sgid.data() == gid {
  345. guard.setegid(gid);
  346. guard.setfsgid(gid);
  347. } else {
  348. return Err(SystemError::EPERM);
  349. }
  350. return Ok(0);
  351. }
  352. pub fn seteuid(euid: usize) -> Result<usize, SystemError> {
  353. let pcb = ProcessManager::current_pcb();
  354. let mut guard = pcb.cred.lock();
  355. if euid == usize::MAX || (euid == guard.euid.data() && euid == guard.fsuid.data()) {
  356. return Ok(0);
  357. }
  358. if euid != usize::MAX {
  359. guard.seteuid(euid);
  360. }
  361. let euid = guard.euid.data();
  362. guard.setfsuid(euid);
  363. return Ok(0);
  364. }
  365. pub fn setegid(egid: usize) -> Result<usize, SystemError> {
  366. let pcb = ProcessManager::current_pcb();
  367. let mut guard = pcb.cred.lock();
  368. if egid == usize::MAX || (egid == guard.egid.data() && egid == guard.fsgid.data()) {
  369. return Ok(0);
  370. }
  371. if egid != usize::MAX {
  372. guard.setegid(egid);
  373. }
  374. let egid = guard.egid.data();
  375. guard.setfsgid(egid);
  376. return Ok(0);
  377. }
  378. pub fn setfsuid(fsuid: usize) -> Result<usize, SystemError> {
  379. let fsuid = Kuid::new(fsuid);
  380. let pcb = ProcessManager::current_pcb();
  381. let mut guard = pcb.cred.lock();
  382. let old_fsuid = guard.fsuid;
  383. if fsuid == guard.uid || fsuid == guard.euid || fsuid == guard.suid {
  384. guard.setfsuid(fsuid.data());
  385. }
  386. Ok(old_fsuid.data())
  387. }
  388. pub fn setfsgid(fsgid: usize) -> Result<usize, SystemError> {
  389. let fsgid = Kgid::new(fsgid);
  390. let pcb = ProcessManager::current_pcb();
  391. let mut guard = pcb.cred.lock();
  392. let old_fsgid = guard.fsgid;
  393. if fsgid == guard.gid || fsgid == guard.egid || fsgid == guard.sgid {
  394. guard.setfsgid(fsgid.data());
  395. }
  396. Ok(old_fsgid.data())
  397. }
  398. pub fn get_rusage(who: i32, rusage: *mut RUsage) -> Result<usize, SystemError> {
  399. let who = RUsageWho::try_from(who)?;
  400. let mut writer = UserBufferWriter::new(rusage, core::mem::size_of::<RUsage>(), true)?;
  401. let pcb = ProcessManager::current_pcb();
  402. let rusage = pcb.get_rusage(who).ok_or(SystemError::EINVAL)?;
  403. let ubuf = writer.buffer::<RUsage>(0).unwrap();
  404. ubuf.copy_from_slice(&[rusage]);
  405. return Ok(0);
  406. }
  407. /// # 设置资源限制
  408. ///
  409. /// TODO: 目前暂时不支持设置资源限制,只提供读取默认值的功能
  410. ///
  411. /// ## 参数
  412. ///
  413. /// - pid: 进程号
  414. /// - resource: 资源类型
  415. /// - new_limit: 新的资源限制
  416. /// - old_limit: 旧的资源限制
  417. ///
  418. /// ## 返回值
  419. ///
  420. /// - 成功,0
  421. /// - 如果old_limit不为NULL,则返回旧的资源限制到old_limit
  422. ///
  423. pub fn prlimit64(
  424. _pid: Pid,
  425. resource: usize,
  426. _new_limit: *const RLimit64,
  427. old_limit: *mut RLimit64,
  428. ) -> Result<usize, SystemError> {
  429. let resource = RLimitID::try_from(resource)?;
  430. let mut writer = None;
  431. if !old_limit.is_null() {
  432. writer = Some(UserBufferWriter::new(
  433. old_limit,
  434. core::mem::size_of::<RLimit64>(),
  435. true,
  436. )?);
  437. }
  438. match resource {
  439. RLimitID::Stack => {
  440. if let Some(mut writer) = writer {
  441. let mut rlimit = writer.buffer::<RLimit64>(0).unwrap()[0];
  442. rlimit.rlim_cur = UserStack::DEFAULT_USER_STACK_SIZE as u64;
  443. rlimit.rlim_max = UserStack::DEFAULT_USER_STACK_SIZE as u64;
  444. }
  445. return Ok(0);
  446. }
  447. RLimitID::Nofile => {
  448. if let Some(mut writer) = writer {
  449. let mut rlimit = writer.buffer::<RLimit64>(0).unwrap()[0];
  450. rlimit.rlim_cur = FileDescriptorVec::PROCESS_MAX_FD as u64;
  451. rlimit.rlim_max = FileDescriptorVec::PROCESS_MAX_FD as u64;
  452. }
  453. return Ok(0);
  454. }
  455. RLimitID::As | RLimitID::Rss => {
  456. if let Some(mut writer) = writer {
  457. let mut rlimit = writer.buffer::<RLimit64>(0).unwrap()[0];
  458. rlimit.rlim_cur = MMArch::USER_END_VADDR.data() as u64;
  459. rlimit.rlim_max = MMArch::USER_END_VADDR.data() as u64;
  460. }
  461. return Ok(0);
  462. }
  463. _ => {
  464. return Err(SystemError::ENOSYS);
  465. }
  466. }
  467. }
  468. pub fn uname(name: *mut PosixOldUtsName) -> Result<usize, SystemError> {
  469. let mut writer =
  470. UserBufferWriter::new(name, core::mem::size_of::<PosixOldUtsName>(), true)?;
  471. writer.copy_one_to_user(&PosixOldUtsName::new(), 0)?;
  472. return Ok(0);
  473. }
  474. }
  475. /// 切换用户虚拟内存空间
  476. ///
  477. /// 该函数用于在执行系统调用 `execve` 时切换用户进程的虚拟内存空间。
  478. ///
  479. /// # 参数
  480. /// - `new_vm`: 新的用户地址空间,类型为 `Arc<AddressSpace>`。
  481. ///
  482. /// # 返回值
  483. /// - 返回旧的用户地址空间的引用,类型为 `Option<Arc<AddressSpace>>`。
  484. ///
  485. /// # 错误处理
  486. /// 如果地址空间切换失败,函数会触发断言失败,并输出错误信息。
  487. fn do_execve_switch_user_vm(new_vm: Arc<AddressSpace>) -> Option<Arc<AddressSpace>> {
  488. // 关中断,防止在设置地址空间的时候,发生中断,然后进调度器,出现错误。
  489. let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
  490. let pcb = ProcessManager::current_pcb();
  491. // log::debug!(
  492. // "pid: {:?} do_execve: path: {:?}, argv: {:?}, envp: {:?}\n",
  493. // pcb.pid(),
  494. // path,
  495. // argv,
  496. // envp
  497. // );
  498. let mut basic_info = pcb.basic_mut();
  499. // 暂存原本的用户地址空间的引用(因为如果在切换页表之前释放了它,可能会造成内存use after free)
  500. let old_address_space = basic_info.user_vm();
  501. // 在pcb中原来的用户地址空间
  502. unsafe {
  503. basic_info.set_user_vm(None);
  504. }
  505. // 创建新的地址空间并设置为当前地址空间
  506. unsafe {
  507. basic_info.set_user_vm(Some(new_vm.clone()));
  508. }
  509. // to avoid deadlock
  510. drop(basic_info);
  511. assert!(
  512. AddressSpace::is_current(&new_vm),
  513. "Failed to set address space"
  514. );
  515. // debug!("Switch to new address space");
  516. // 切换到新的用户地址空间
  517. unsafe { new_vm.read().user_mapper.utable.make_current() };
  518. drop(irq_guard);
  519. old_address_space
  520. }