123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314 |
- use core::arch::global_asm;
- use core::mem::size_of;
- use alloc::boxed::Box;
- use alloc::vec::Vec;
- use syscall::data::Map;
- use syscall::flag::{MapFlags, O_CLOEXEC};
- use syscall::error::{Error, Result, EINVAL, ENAMETOOLONG};
- use syscall::SIGCONT;
- use super::extra::{create_set_addr_space_buf, FdGuard};
- fn new_context() -> Result<(FdGuard, usize)> {
- // Create a new context (fields such as uid/gid will be inherited from the current context).
- let fd = FdGuard::new(syscall::open("thisproc:new/open_via_dup", O_CLOEXEC)?);
- // Extract pid.
- let mut buffer = [0_u8; 64];
- let len = syscall::fpath(*fd, &mut buffer)?;
- let buffer = buffer.get(..len).ok_or(Error::new(ENAMETOOLONG))?;
- let colon_idx = buffer.iter().position(|c| *c == b':').ok_or(Error::new(EINVAL))?;
- let slash_idx = buffer.iter().skip(colon_idx).position(|c| *c == b'/').ok_or(Error::new(EINVAL))? + colon_idx;
- let pid_bytes = buffer.get(colon_idx + 1..slash_idx).ok_or(Error::new(EINVAL))?;
- let pid_str = core::str::from_utf8(pid_bytes).map_err(|_| Error::new(EINVAL))?;
- let pid = pid_str.parse::<usize>().map_err(|_| Error::new(EINVAL))?;
- Ok((fd, pid))
- }
- fn copy_str(cur_pid_fd: usize, new_pid_fd: usize, key: &str) -> Result<()> {
- let cur_name_fd = FdGuard::new(syscall::dup(cur_pid_fd, key.as_bytes())?);
- let new_name_fd = FdGuard::new(syscall::dup(new_pid_fd, key.as_bytes())?);
- let mut buf = [0_u8; 256];
- let len = syscall::read(*cur_name_fd, &mut buf)?;
- let buf = buf.get(..len).ok_or(Error::new(ENAMETOOLONG))?;
- syscall::write(*new_name_fd, &buf)?;
- Ok(())
- }
- #[cfg(target_arch = "x86_64")]
- fn copy_env_regs(cur_pid_fd: usize, new_pid_fd: usize) -> Result<()> {
- // Copy environment registers.
- {
- let cur_env_regs_fd = FdGuard::new(syscall::dup(cur_pid_fd, b"regs/env")?);
- let new_env_regs_fd = FdGuard::new(syscall::dup(new_pid_fd, b"regs/env")?);
- let mut env_regs = syscall::EnvRegisters::default();
- let _ = syscall::read(*cur_env_regs_fd, &mut env_regs)?;
- let _ = syscall::write(*new_env_regs_fd, &env_regs)?;
- }
- Ok(())
- }
- /// Spawns a new context sharing the same address space as the current one (i.e. a new thread).
- pub unsafe fn pte_clone_impl(stack: *mut usize) -> Result<usize> {
- let cur_pid_fd = FdGuard::new(syscall::open("thisproc:current/open_via_dup", O_CLOEXEC)?);
- let (new_pid_fd, new_pid) = new_context()?;
- // Allocate a new signal stack.
- {
- let sigstack_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"sigstack")?);
- const SIGSTACK_SIZE: usize = 1024 * 256;
- // TODO: Put sigstack at high addresses?
- let target_sigstack = syscall::fmap(!0, &Map { address: 0, flags: MapFlags::PROT_READ | MapFlags::PROT_WRITE | MapFlags::MAP_PRIVATE, offset: 0, size: SIGSTACK_SIZE })? + SIGSTACK_SIZE;
- let _ = syscall::write(*sigstack_fd, &usize::to_ne_bytes(target_sigstack))?;
- }
- copy_str(*cur_pid_fd, *new_pid_fd, "name")?;
- copy_str(*cur_pid_fd, *new_pid_fd, "cwd")?;
- // Reuse existing address space
- {
- let cur_addr_space_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"addrspace")?);
- let new_addr_space_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-addrspace")?);
- let buf = create_set_addr_space_buf(*cur_addr_space_fd, pte_clone_ret as usize, stack as usize);
- let _ = syscall::write(*new_addr_space_sel_fd, &buf)?;
- }
- // Reuse file table
- {
- let cur_filetable_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"filetable")?);
- let new_filetable_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-filetable")?);
- let _ = syscall::write(*new_filetable_sel_fd, &usize::to_ne_bytes(*cur_filetable_fd))?;
- }
- // Reuse sigactions (on Linux, CLONE_THREAD requires CLONE_SIGHAND which implies the sigactions
- // table is reused).
- {
- let cur_sigaction_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"sigactions")?);
- let new_sigaction_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-sigactions")?);
- let _ = syscall::write(*new_sigaction_sel_fd, &usize::to_ne_bytes(*cur_sigaction_fd))?;
- }
- copy_env_regs(*cur_pid_fd, *new_pid_fd)?;
- // Unblock context.
- syscall::kill(new_pid, SIGCONT)?;
- Ok(0)
- }
- /// Spawns a new context which will not share the same address space as the current one. File
- /// descriptors from other schemes are reobtained with `dup`, and grants referencing such file
- /// descriptors are reobtained through `fmap`. Other mappings are kept but duplicated using CoW.
- pub fn fork_impl() -> Result<usize> {
- unsafe {
- Error::demux(fork_wrapper())
- }
- }
- fn fork_inner(initial_rsp: *mut usize) -> Result<usize> {
- let (cur_filetable_fd, new_pid_fd, new_pid);
- {
- let cur_pid_fd = FdGuard::new(syscall::open("thisproc:current/open_via_dup", O_CLOEXEC)?);
- (new_pid_fd, new_pid) = new_context()?;
- // Do not allocate new signal stack, but copy existing address (all memory will be re-mapped
- // CoW later).
- {
- let cur_sigstack_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"sigstack")?);
- let new_sigstack_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"sigstack")?);
- let mut sigstack_buf = usize::to_ne_bytes(0);
- let _ = syscall::read(*cur_sigstack_fd, &mut sigstack_buf);
- let _ = syscall::write(*new_sigstack_fd, &sigstack_buf);
- }
- copy_str(*cur_pid_fd, *new_pid_fd, "name")?;
- copy_str(*cur_pid_fd, *new_pid_fd, "cwd")?;
- {
- let cur_sigaction_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"sigactions")?);
- let new_sigaction_fd = FdGuard::new(syscall::dup(*cur_sigaction_fd, b"copy")?);
- let new_sigaction_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-sigactions")?);
- let _ = syscall::write(*new_sigaction_sel_fd, &usize::to_ne_bytes(*new_sigaction_fd))?;
- }
- // Copy existing files into new file table, but do not reuse the same file table (i.e. new
- // parent FDs will not show up for the child).
- {
- cur_filetable_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"filetable")?);
- // This must be done before the address space is copied.
- unsafe {
- initial_rsp.write(*cur_filetable_fd);
- initial_rsp.add(1).write(*new_pid_fd);
- }
- }
- // CoW-duplicate address space.
- {
- let cur_addr_space_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"addrspace")?);
- // FIXME: Find mappings which use external file descriptors
- let new_addr_space_fd = FdGuard::new(syscall::dup(*cur_addr_space_fd, b"exclusive")?);
- let mut buf = vec! [0_u8; 4096];
- let mut bytes_read = 0;
- loop {
- let new_bytes_read = syscall::read(*cur_addr_space_fd, &mut buf[bytes_read..])?;
- if new_bytes_read == 0 { break }
- bytes_read += new_bytes_read;
- }
- let bytes = &buf[..bytes_read];
- for struct_bytes in bytes.array_chunks::<{size_of::<usize>() * 4}>() {
- let mut words = struct_bytes.array_chunks::<{size_of::<usize>()}>().copied().map(usize::from_ne_bytes);
- let addr = words.next().unwrap();
- let size = words.next().unwrap();
- let flags = words.next().unwrap();
- let offset = words.next().unwrap();
- if flags & 0x8000_0000 == 0 {
- continue;
- }
- let map_flags = MapFlags::from_bits_truncate(flags);
- let mapped_address = unsafe {
- let fd = FdGuard::new(syscall::dup(*cur_addr_space_fd, format!("grant-{:x}", addr).as_bytes())?);
- syscall::fmap(*fd, &syscall::Map { address: 0, size, flags: map_flags, offset })?
- };
- let mut buf = [0_u8; size_of::<usize>() * 4];
- let mut chunks = buf.array_chunks_mut::<{size_of::<usize>()}>();
- *chunks.next().unwrap() = usize::to_ne_bytes(addr);
- *chunks.next().unwrap() = usize::to_ne_bytes(size);
- *chunks.next().unwrap() = usize::to_ne_bytes(map_flags.bits());
- *chunks.next().unwrap() = usize::to_ne_bytes(mapped_address);
- let _ = syscall::write(*new_addr_space_fd, &buf)?;
- }
- let new_addr_space_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-addrspace")?);
- let buf = create_set_addr_space_buf(*new_addr_space_fd, fork_ret as usize, initial_rsp as usize);
- let _ = syscall::write(*new_addr_space_sel_fd, &buf)?;
- }
- copy_env_regs(*cur_pid_fd, *new_pid_fd)?;
- }
- // Copy the file table. We do this last to ensure that all previously used file descriptors are
- // closed. The only exception -- the filetable selection fd and the current filetable fd --
- // will be closed by the child process.
- {
- // TODO: Use cross_scheme_links or something similar to avoid copying the file table in the
- // kernel.
- let new_filetable_fd = FdGuard::new(syscall::dup(*cur_filetable_fd, b"copy")?);
- let new_filetable_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-filetable")?);
- let _ = syscall::write(*new_filetable_sel_fd, &usize::to_ne_bytes(*new_filetable_fd));
- }
- // Unblock context.
- syscall::kill(new_pid, SIGCONT)?;
- Ok(new_pid)
- }
- #[no_mangle]
- unsafe extern "sysv64" fn __relibc_internal_fork_impl(initial_rsp: *mut usize) -> usize {
- Error::mux(fork_inner(initial_rsp))
- }
- #[no_mangle]
- unsafe extern "sysv64" fn __relibc_internal_fork_hook(cur_filetable_fd: usize, new_pid_fd: usize) {
- let _ = syscall::close(cur_filetable_fd);
- let _ = syscall::close(new_pid_fd);
- }
- #[no_mangle]
- core::arch::global_asm!("
- .p2align 6
- .globl fork_wrapper
- .type fork_wrapper, @function
- fork_wrapper:
- push rbp
- mov rbp, rsp
- push rbx
- push rbp
- push r12
- push r13
- push r14
- push r15
- sub rsp, 32
- stmxcsr [rsp+16]
- fnstcw [rsp+24]
- mov rdi, rsp
- call __relibc_internal_fork_impl
- jmp 2f
- fork_ret:
- mov rdi, [rsp]
- mov rsi, [rsp + 8]
- call __relibc_internal_fork_hook
- ldmxcsr [rsp+16]
- fldcw [rsp+24]
- xor rax, rax
- 2:
- add rsp, 32
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbp
- pop rbx
- pop rbp
- ret
- .size fork_wrapper, . - fork_wrapper
- .globl pte_clone_ret
- .type pte_clone_ret, @function
- pte_clone_ret:
- # Load registers
- pop rax
- pop rdi
- pop rsi
- pop rdx
- pop rcx
- pop r8
- pop r9
- # Call entry point
- call rax
- ret
- .size pte_clone_ret, . - pte_clone_ret
- ");
- extern "sysv64" {
- fn fork_wrapper() -> usize;
- fn fork_ret();
- fn pte_clone_ret();
- }
|