Browse Source

Implement fexec in userspace.

4lDO2 3 years ago
parent
commit
2186cd1fbd
10 changed files with 498 additions and 94 deletions
  1. 4 0
      .gitmodules
  2. 62 53
      Cargo.lock
  3. 2 1
      Cargo.toml
  4. 7 22
      Makefile
  5. 22 0
      renamesyms.sh
  6. 16 0
      src/ld_so/tcb.rs
  7. 271 0
      src/platform/redox/exec.rs
  8. 57 2
      src/platform/redox/extra.rs
  9. 45 16
      src/platform/redox/mod.rs
  10. 12 0
      src/start.rs

+ 4 - 0
.gitmodules

@@ -13,3 +13,7 @@
 [submodule "pthreads-emb"]
 	path = pthreads-emb
 	url = https://gitlab.redox-os.org/redox-os/pthreads-emb.git
+[submodule "compiler-builtins"]
+	path = compiler-builtins
+	url = https://gitlab.redox-os.org/redox-os/compiler-builtins.git
+	branch = relibc_fix_dup_symbols

+ 62 - 53
Cargo.lock

@@ -42,11 +42,11 @@ checksum = "2db2df1ebc842c41fd2c4ae5b5a577faf63bd5151b953db752fc686812bee318"
 dependencies = [
  "clap",
  "log",
- "proc-macro2 1.0.36",
- "quote 1.0.16",
+ "proc-macro2 1.0.42",
+ "quote 1.0.20",
  "serde",
  "serde_json",
- "syn 1.0.89",
+ "syn 1.0.98",
  "tempfile",
  "toml",
 ]
@@ -108,9 +108,9 @@ version = "0.1.0"
 
 [[package]]
 name = "fastrand"
-version = "1.7.0"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf"
+checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499"
 dependencies = [
  "instant",
 ]
@@ -146,9 +146,9 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
+checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d"
 
 [[package]]
 name = "lazy_static"
@@ -165,33 +165,34 @@ version = "0.1.0"
 
 [[package]]
 name = "libc"
-version = "0.2.121"
+version = "0.2.126"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
+checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
 
 [[package]]
 name = "lock_api"
-version = "0.4.6"
+version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b"
+checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53"
 dependencies = [
+ "autocfg",
  "scopeguard",
 ]
 
 [[package]]
 name = "log"
-version = "0.4.16"
+version = "0.4.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8"
+checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
 dependencies = [
  "cfg-if",
 ]
 
 [[package]]
 name = "memchr"
-version = "2.4.1"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
+checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
 
 [[package]]
 name = "memoffset"
@@ -204,9 +205,9 @@ dependencies = [
 
 [[package]]
 name = "num-traits"
-version = "0.2.14"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
+checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
 dependencies = [
  "autocfg",
 ]
@@ -227,16 +228,16 @@ version = "0.4.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759"
 dependencies = [
- "unicode-xid 0.1.0",
+ "unicode-xid",
 ]
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.36"
+version = "1.0.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
+checksum = "c278e965f1d8cf32d6e0e96de3d3e79712178ae67986d9cf9151f51e95aac89b"
 dependencies = [
- "unicode-xid 0.2.2",
+ "unicode-ident",
 ]
 
 [[package]]
@@ -250,11 +251,11 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.16"
+version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4af2ec4714533fcdf07e886f17025ace8b997b9ce51204ee69b6da831c3da57"
+checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
 dependencies = [
- "proc-macro2 1.0.36",
+ "proc-macro2 1.0.42",
 ]
 
 [[package]]
@@ -303,6 +304,14 @@ version = "0.1.57"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
 
+[[package]]
+name = "redox_syscall"
+version = "0.2.13"
+source = "git+https://gitlab.redox-os.org/4lDO2/syscall.git?branch=userspace_fexec#676b758a4f8de3fa7f22d72f84c0f7b05152c5df"
+dependencies = [
+ "bitflags",
+]
+
 [[package]]
 name = "redox_syscall"
 version = "0.2.15"
@@ -327,9 +336,9 @@ dependencies = [
  "posix-regex",
  "ralloc",
  "rand",
- "redox_syscall 0.2.15",
+ "redox_syscall 0.2.13",
  "sc",
- "spin 0.9.2",
+ "spin 0.9.4",
 ]
 
 [[package]]
@@ -361,9 +370,9 @@ dependencies = [
 
 [[package]]
 name = "ryu"
-version = "1.0.9"
+version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
+checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
 
 [[package]]
 name = "sc"
@@ -421,29 +430,29 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 
 [[package]]
 name = "serde"
-version = "1.0.136"
+version = "1.0.140"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
+checksum = "fc855a42c7967b7c369eb5860f7164ef1f6f81c20c7cc1141f2a604e18723b03"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.136"
+version = "1.0.140"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9"
+checksum = "6f2122636b9fe3b81f1cb25099fcf2d3f542cdb1d45940d56c713158884a05da"
 dependencies = [
- "proc-macro2 1.0.36",
- "quote 1.0.16",
- "syn 1.0.89",
+ "proc-macro2 1.0.42",
+ "quote 1.0.20",
+ "syn 1.0.98",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.79"
+version = "1.0.82"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95"
+checksum = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7"
 dependencies = [
  "itoa",
  "ryu",
@@ -458,9 +467,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
 
 [[package]]
 name = "spin"
-version = "0.9.2"
+version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "511254be0c5bcf062b019a6c89c01a664aa359ded62f78aa72c6fc137c0590e5"
+checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09"
 dependencies = [
  "lock_api",
 ]
@@ -479,18 +488,18 @@ checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5"
 dependencies = [
  "proc-macro2 0.4.30",
  "quote 0.6.13",
- "unicode-xid 0.1.0",
+ "unicode-xid",
 ]
 
 [[package]]
 name = "syn"
-version = "1.0.89"
+version = "1.0.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea297be220d52398dcc07ce15a209fce436d361735ac1db700cab3b6cdfb9f54"
+checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
 dependencies = [
- "proc-macro2 1.0.36",
- "quote 1.0.16",
- "unicode-xid 0.2.2",
+ "proc-macro2 1.0.42",
+ "quote 1.0.20",
+ "unicode-ident",
 ]
 
 [[package]]
@@ -518,9 +527,9 @@ dependencies = [
 
 [[package]]
 name = "toml"
-version = "0.5.8"
+version = "0.5.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa"
+checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7"
 dependencies = [
  "serde",
 ]
@@ -531,6 +540,12 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e92e959f029e4f8ee25d70d15ab58d2b46f98a17bc238b9265ff0c26f6f3d67f"
 
+[[package]]
+name = "unicode-ident"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"
+
 [[package]]
 name = "unicode-width"
 version = "0.1.9"
@@ -543,12 +558,6 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
 
-[[package]]
-name = "unicode-xid"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
-
 [[package]]
 name = "vec_map"
 version = "0.8.2"

+ 2 - 1
Cargo.toml

@@ -39,7 +39,8 @@ optional = true
 sc = "0.2.3"
 
 [target.'cfg(target_os = "redox")'.dependencies]
-redox_syscall = "0.2.15"
+#redox_syscall = "0.2.15"
+redox_syscall = { git = "https://gitlab.redox-os.org/4lDO2/syscall.git", branch = "userspace_fexec" }
 spin = "0.9.0"
 
 [features]

+ 7 - 22
Makefile

@@ -2,13 +2,11 @@ TARGET?=$(shell rustc -Z unstable-options --print target-spec-json | grep llvm-t
 
 CARGO?=cargo
 CARGO_TEST?=$(CARGO)
-CARGOFLAGS?=-Z build-std=core,alloc,compiler_builtins
+CARGO_COMMON_FLAGS=-Z build-std=core,alloc,compiler_builtins
+CARGOFLAGS?=$(CARGO_COMMON_FLAGS)
 RUSTCFLAGS?=
 export OBJCOPY?=objcopy
 
-# When using xargo, build it in local location
-export XARGO_HOME=$(CURDIR)/target/xargo
-
 BUILD="target/$(TARGET)"
 CARGOFLAGS+="--target=$(TARGET)"
 
@@ -51,21 +49,7 @@ SRC=\
 	Cargo.* \
 	$(shell find src -type f)
 
-# FIXME: Remove the following line. It's only required since xargo automatically links with compiler_builtins, which conflicts with the compiler_builtins that rustc always links with.
-WEAKEN_SYMBOLS=\
-	-W __divti3 \
-	-W __fixdfti \
-	-W __floattidf \
-	-W __muloti4 \
-	-W __udivti3 \
-	-W __umodti3 \
-	-W __rust_probestack \
-	-W __rust_alloc \
-	-W __rust_alloc_zeroed \
-	-W __rust_dealloc \
-	-W __rust_realloc \
-	-W __rdl_oom \
-	-W __rg_oom
+BUILTINS_VERSION=0.1.70
 
 .PHONY: all clean fmt install install-headers libs submodules test
 
@@ -146,8 +130,7 @@ $(BUILD)/debug/libc.so: $(BUILD)/debug/librelibc.a $(BUILD)/pthreads-emb/libpthr
 
 $(BUILD)/debug/librelibc.a: $(SRC)
 	CARGO_INCREMENTAL=0 $(CARGO) rustc $(CARGOFLAGS) -- --emit link=$@ $(RUSTCFLAGS)
-	# FIXME: Remove the following line. It's only required since xargo automatically links with compiler_builtins, which conflicts with the compiler_builtins that rustc always links with.
-	$(OBJCOPY) $@ $(WEAKEN_SYMBOLS)
+	./renamesyms.sh $@ $(BUILD)/debug/deps/
 	touch $@
 
 $(BUILD)/debug/crt0.o: $(SRC)
@@ -185,7 +168,9 @@ $(BUILD)/release/libc.so: $(BUILD)/release/librelibc.a $(BUILD)/pthreads-emb/lib
 
 $(BUILD)/release/librelibc.a: $(SRC)
 	CARGO_INCREMENTAL=0 $(CARGO) rustc --release $(CARGOFLAGS) -- --emit link=$@ $(RUSTCFLAGS)
-	$(OBJCOPY) $@ $(WEAKEN_SYMBOLS)
+	# TODO: Better to only allow a certain whitelisted set of symbols? Perhaps
+	# use some cbindgen hook, specify them manually, or grep for #[no_mangle].
+	./renamesyms.sh $@ $(BUILD)/release/deps/
 	touch $@
 
 $(BUILD)/release/crt0.o: $(SRC)

+ 22 - 0
renamesyms.sh

@@ -0,0 +1,22 @@
+#!/bin/sh
+target=$1
+deps_dir=$2
+
+if [ -z "$target" ] || [ -z "$deps_dir" ]; then
+    echo "Usage:\n\t./renamesyms.sh TARGET DEPS_DIR"
+    exit 1
+fi
+
+symbols_file=`mktemp`
+
+for dep in `find $deps_dir -type f -name "*.rlib"`; do
+    nm --format=posix -g "$dep" 2>/dev/null | sed 's/.*:.*//g' | awk '{if ($2 == "T") print $1}' | sed 's/^\(.*\)$/\1 __relibc_\1/g' >> $symbols_file
+done
+
+sorted_file=`mktemp`
+sort -u "$symbols_file" > "$sorted_file"
+rm -f "$symbols_file"
+
+objcopy --redefine-syms="$sorted_file" "$target"
+
+rm -f "$sorted_file"

+ 16 - 0
src/ld_so/tcb.rs

@@ -168,6 +168,22 @@ impl Tcb {
         Self::os_arch_activate(self.tcb_ptr as usize);
     }
 
+    /// Deactive TLS, used before exec() on Redox to not trick target executable into thinking TLS
+    /// is already initialized as if it was a thread.
+    #[cfg(all(target_os = "redox", target_arch = "x86_64"))]
+    pub unsafe fn deactivate() {
+        let mut env = syscall::EnvRegisters::default();
+
+        let file = syscall::open("thisproc:current/regs/env", syscall::O_CLOEXEC | syscall::O_WRONLY)
+            .expect_notls("failed to open handle for process registers");
+
+        env.fsbase = 0;
+        env.gsbase = 0;
+
+        let _ = syscall::write(file, &mut env)
+            .expect_notls("failed to read fsbase");
+    }
+
     /// Mapping with correct flags for TCB and TLS
     unsafe fn map(size: usize) -> Result<&'static mut [u8]> {
         let ptr = sys_mman::mmap(

+ 271 - 0
src/platform/redox/exec.rs

@@ -0,0 +1,271 @@
+use core::convert::TryFrom;
+
+use alloc::{
+    collections::{btree_map::Entry, BTreeMap},
+    vec::Vec,
+};
+
+use syscall::{
+    data::ExecMemRange,
+    error::{Error, Result, ENOEXEC, ENOMEM},
+    flag::{AT_ENTRY, AT_NULL, AT_PHDR, AT_PHENT, AT_PHNUM, MapFlags},
+};
+
+fn read_all(fd: usize, offset: u64, buf: &mut [u8]) -> Result<()> {
+    syscall::lseek(fd, offset as isize, syscall::SEEK_SET).unwrap();
+
+    let mut total_bytes_read = 0;
+
+    while total_bytes_read < buf.len() {
+        total_bytes_read += match syscall::read(fd, &mut buf[total_bytes_read..])? {
+            0 => return Err(Error::new(ENOEXEC)),
+            bytes_read => bytes_read,
+        }
+    }
+    Ok(())
+}
+
+fn find_free_target_addr(tree: &BTreeMap<usize, TreeEntry>, size: usize) -> Option<usize> {
+    let mut iterator = tree.iter().peekable();
+
+    // Ignore the space between zero and the first region, to avoid null pointers.
+    while let Some((cur_address, entry)) = iterator.next() {
+        let end = *cur_address + entry.size;
+
+        if let Some((next_address, _)) = iterator.peek() {
+            if **next_address - end > size {
+                return Some(end);
+            }
+        }
+        // No need to check last entry, since the stack will always be put at the highest
+        // possible address.
+    }
+
+    None
+}
+struct TreeEntry {
+    size: usize, // always a page-size multiple
+    flags: MapFlags,
+    accessible_addr: *mut u8, // also always a page-size multiple
+}
+impl Drop for TreeEntry {
+    fn drop(&mut self) {
+        unsafe {
+            if !self.accessible_addr.is_null() {
+                let _ = syscall::funmap(self.accessible_addr as usize, self.size);
+            }
+        }
+    }
+}
+
+#[cfg(target_arch = "x86_64")]
+const PAGE_SIZE: usize = 4096;
+
+const FD_ANONYMOUS: usize = !0;
+
+pub fn fexec_impl(fd: usize, path: &[u8], args: &[&[u8]], envs: &[&[u8]], args_envs_size_without_nul: usize) -> Result<usize> {
+    let total_args_envs_size = args_envs_size_without_nul + args.len() + envs.len();
+
+    // Here, we do the minimum part of loading an application, which is what the kernel used to do.
+    // We load the executable into memory (albeit at different offsets in this executable), fix
+    // some misalignments, and then execute the SYS_EXEC syscall to replace the program memory
+    // entirely.
+
+    // TODO: setuid/setgid
+    // TODO: Introduce RAII guards to all owned allocations so that no leaks occur in case of
+    // errors.
+
+    use goblin::elf::header::header64::Header;
+
+    let mut header_bytes = [0_u8; core::mem::size_of::<Header>()];
+
+    read_all(fd, 0, &mut header_bytes)?;
+
+    let header = Header::from_bytes(&header_bytes);
+
+    let instruction_ptr = usize::try_from(header.e_entry).map_err(|_| Error::new(ENOEXEC))?;
+
+    let mut tree = BTreeMap::<usize, TreeEntry>::new();
+
+    use goblin::elf64::program_header::{self, ProgramHeader};
+
+    let phdrs_size = (header.e_phnum as usize) * (header.e_phentsize as usize);
+    let phdrs_size_aligned = (phdrs_size + PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE;
+    let phdrs_mem = unsafe { syscall::fmap(FD_ANONYMOUS, &syscall::Map { offset: 0, size: phdrs_size_aligned, address: 0, flags: MapFlags::PROT_WRITE | MapFlags::MAP_PRIVATE })? };
+    read_all(fd, header.e_phoff, unsafe { core::slice::from_raw_parts_mut(phdrs_mem as *mut u8, phdrs_size) })?;
+
+    let phdrs = unsafe { core::slice::from_raw_parts(phdrs_mem as *const ProgramHeader, header.e_phnum as usize) };
+
+    for segment in phdrs {
+        let mut flags = syscall::PROT_READ;
+
+        // W ^ X. If it is executable, do not allow it to be writable, even if requested
+        if segment.p_flags & program_header::PF_X == program_header::PF_X {
+            flags |= syscall::PROT_EXEC;
+        } else if segment.p_flags & program_header::PF_W == program_header::PF_W {
+            flags |= syscall::PROT_WRITE;
+        }
+
+        match segment.p_type {
+            program_header::PT_LOAD => {
+                let voff = segment.p_vaddr as usize % PAGE_SIZE;
+                let vaddr = segment.p_vaddr as usize - voff;
+                let size =
+                    (segment.p_memsz as usize + voff + PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE;
+
+                if segment.p_filesz > segment.p_memsz {
+                    return Err(Error::new(ENOEXEC));
+                }
+
+                let mem = match tree
+                    .range_mut(..=vaddr)
+                    .next_back()
+                    .filter(|(other_vaddr, entry)| **other_vaddr + entry.size > vaddr)
+                {
+                    None => unsafe {
+                        let mem = syscall::fmap(
+                            FD_ANONYMOUS,
+                            &syscall::Map {
+                                offset: 0,
+                                address: 0,
+                                size,
+                                flags: syscall::PROT_WRITE,
+                            },
+                        )
+                        .map_err(|_| Error::new(ENOMEM))?
+                            as *mut u8;
+                        tree.insert(
+                            vaddr,
+                            TreeEntry {
+                                size,
+                                flags,
+                                accessible_addr: mem,
+                            },
+                        );
+                        mem
+                    },
+                    Some((
+                        _,
+                        &mut TreeEntry {
+                            flags: ref mut f,
+                            accessible_addr,
+                            ..
+                        },
+                    )) => {
+                        *f |= flags;
+                        accessible_addr
+                    }
+                };
+                read_all(fd, segment.p_offset, unsafe {
+                    core::slice::from_raw_parts_mut(mem.add(voff), segment.p_filesz as usize)
+                })?;
+            }
+            _ => (),
+        }
+    }
+    let (stack_base, mut stack_mem) = unsafe {
+        let stack_base = syscall::fmap(FD_ANONYMOUS, &syscall::Map { offset: 0, size: STACK_SIZE, address: 0, flags: MapFlags::PROT_WRITE | MapFlags::PROT_READ | MapFlags::MAP_PRIVATE })? as *mut u8;
+        let stack_mem = stack_base.add(STACK_SIZE).sub(256);
+
+        (stack_base, stack_mem)
+    };
+
+    tree.insert(STACK_TOP - STACK_SIZE, TreeEntry {
+        size: STACK_SIZE,
+        flags: MapFlags::PROT_READ | MapFlags::PROT_WRITE | MapFlags::MAP_PRIVATE,
+        accessible_addr: stack_base,
+    });
+    let mut stack_mem = stack_mem.cast::<usize>();
+
+    let target_phdr_address = find_free_target_addr(&tree, phdrs_size_aligned).ok_or(Error::new(ENOMEM))?;
+    tree.insert(target_phdr_address, TreeEntry {
+        size: phdrs_size_aligned,
+        accessible_addr: phdrs_mem as *mut u8,
+        flags: MapFlags::PROT_READ | MapFlags::MAP_PRIVATE,
+    });
+
+    let mut sp = STACK_TOP - 256;
+
+    let mut push = |word: usize| unsafe {
+        sp -= core::mem::size_of::<usize>();
+        stack_mem = stack_mem.sub(1);
+        stack_mem.write(word);
+    };
+
+    push(0);
+    push(AT_NULL);
+    push(instruction_ptr);
+    push(AT_ENTRY);
+    push(target_phdr_address);
+    push(AT_PHDR);
+    push(header.e_phnum as usize);
+    push(AT_PHNUM);
+    push(header.e_phentsize as usize);
+    push(AT_PHENT);
+
+    let args_envs_size_aligned = (total_args_envs_size+PAGE_SIZE-1)/PAGE_SIZE*PAGE_SIZE;
+    let target_args_env_address = find_free_target_addr(&tree, args_envs_size_aligned).ok_or(Error::new(ENOMEM))?;
+
+    unsafe {
+        let map = syscall::Map {
+            offset: 0,
+            flags: MapFlags::PROT_READ | MapFlags::PROT_WRITE | MapFlags::MAP_PRIVATE,
+            address: 0,
+            size: args_envs_size_aligned,
+        };
+        let ptr = syscall::fmap(FD_ANONYMOUS, &map)? as *mut u8;
+        let args_envs_region = core::slice::from_raw_parts_mut(ptr, total_args_envs_size);
+        let mut offset = 0;
+
+        for collection in &[envs, args] {
+            push(0);
+
+            for source_slice in collection.iter().rev().copied() {
+                push(target_args_env_address + offset);
+                args_envs_region[offset..offset + source_slice.len()].copy_from_slice(source_slice);
+                offset += source_slice.len() + 1;
+            }
+        }
+
+        tree.insert(target_args_env_address, TreeEntry {
+            accessible_addr: ptr,
+            size: args_envs_size_aligned,
+            flags: MapFlags::PROT_READ | MapFlags::MAP_PRIVATE,
+        });
+    }
+    push(args.len());
+
+    const STACK_TOP: usize = (1 << 47);
+    const STACK_SIZE: usize = 1024 * 1024;
+
+    let memranges = tree
+        .into_iter()
+        .map(|(address, mut tree_entry)| {
+            // Prevent use-after-free
+            let old_address = core::mem::replace(&mut tree_entry.accessible_addr, core::ptr::null_mut()) as usize;
+
+            ExecMemRange {
+                address,
+                size: tree_entry.size,
+                flags: tree_entry.flags.bits(),
+                old_address,
+            }
+        })
+        .collect::<Vec<_>>();
+
+    /*unsafe {
+        let stack = &*(stack_mem as *const crate::start::Stack);
+
+    }*/
+
+    unsafe { crate::ld_so::tcb::Tcb::deactivate(); }
+
+    // TODO: Restore old name if exec failed?
+    if let Ok(fd) = syscall::open("thisproc:current/name", syscall::O_WRONLY) {
+        let _ = syscall::write(fd, path);
+        let _ = syscall::close(fd);
+    }
+
+    syscall::exec(&memranges, instruction_ptr, sp)?;
+    unreachable!();
+}

+ 57 - 2
src/platform/redox/extra.rs

@@ -1,6 +1,8 @@
 use core::{ptr, slice};
 use core::arch::global_asm;
 
+use syscall::data::CloneInfo;
+
 use crate::platform::{sys::e, types::*};
 
 #[no_mangle]
@@ -50,13 +52,66 @@ pub unsafe extern "C" fn redox_physunmap(virtual_address: *mut c_void) -> c_int
 }
 
 extern "C" {
-    pub fn pte_clone_inner(stack: usize) -> usize;
+    pub fn pte_clone_inner(info: *const CloneInfo) -> usize;
 }
 
 #[cfg(target_arch = "x86_64")]
 global_asm!("
     .globl pte_clone_inner
     .type pte_clone_inner, @function
+    .p2align 6",
+    // Parameters: <info_ptr> in RDI
+"pte_clone_inner:
+    mov rax, {SYS_CLONE}
+    mov rsi, rdi
+    mov rdi, {CLONE_FLAGS}
+    mov rdx, {INFO_LEN}",
+    // Call clone(flags, info_ptr, info_len) syscall
+    "syscall
+
+    # Check if child or parent
+    test rax, rax
+    jnz .parent
+
+    # Load registers
+    pop rax
+    pop rdi
+    pop rsi
+    pop rdx
+    pop rcx
+    pop r8
+    pop r9
+
+    # Call entry point
+    call rax
+
+    # Exit
+    mov rax, {SYS_EXIT}
+    xor rdi, rdi
+    syscall
+
+    # Invalid instruction on failure to exit
+    ud2
+
+    # Return PID if parent
+.parent:
+    ret
+    ",
+    SYS_EXIT = const(syscall::SYS_EXIT),
+    SYS_CLONE = const(syscall::SYS_CLONE),
+    CLONE_FLAGS = const(
+        syscall::CLONE_VM.bits()
+            | syscall::CLONE_FS.bits()
+            | syscall::CLONE_FILES.bits()
+            | syscall::CLONE_SIGHAND.bits()
+            | syscall::CLONE_STACK.bits()
+    ),
+    INFO_LEN = const(core::mem::size_of::<CloneInfo>()),
+);
+
+/*global_asm!("
+    .globl pte_clone_inner
+    .type pte_clone_inner, @function
 
 pte_clone_inner:
     # Move the 1st argument `stack` of this function into the second argument to clone.
@@ -107,4 +162,4 @@ pte_clone_inner:
             | syscall::CLONE_STACK.bits()
     ),
     SYS_CLONE = const(syscall::SYS_CLONE),
-);
+);*/

+ 45 - 16
src/platform/redox/mod.rs

@@ -3,7 +3,7 @@ use core::arch::asm;
 
 use syscall::{
     self,
-    data::{Map, Stat as redox_stat, StatVfs as redox_statvfs, TimeSpec as redox_timespec},
+    data::{CloneInfo, Map, Stat as redox_stat, StatVfs as redox_statvfs, TimeSpec as redox_timespec},
     PtraceEvent, Result,
 };
 
@@ -34,6 +34,7 @@ static mut BRK_CUR: *mut c_void = ptr::null_mut();
 static mut BRK_END: *mut c_void = ptr::null_mut();
 
 mod epoll;
+mod exec;
 mod extra;
 mod ptrace;
 mod signal;
@@ -220,7 +221,7 @@ impl Pal for Sys {
             len += 1;
         }
 
-        let mut args: Vec<[usize; 2]> = Vec::with_capacity(len as usize);
+        let mut args: Vec<&[u8]> = Vec::with_capacity(len as usize);
 
         // Read shebang (for example #!/bin/sh)
         let interpreter = {
@@ -301,44 +302,50 @@ impl Pal for Sys {
             // Make sure path is kept alive long enough, and push it to the arguments
             _interpreter_path = Some(cstring);
             let path_ref = _interpreter_path.as_ref().unwrap();
-            args.push([path_ref.as_ptr() as usize, path_ref.to_bytes().len()]);
+            args.push(path_ref.as_bytes());
         } else {
             if file.seek(SeekFrom::Start(0)).is_err() {
                 return -1;
             }
         }
 
+        let mut args_envs_size_without_nul = 0;
+
         // Arguments
-        while !(*argv).is_null() {
-            let arg = *argv;
+        while !argv.read().is_null() {
+            let arg = argv.read();
 
+            // TODO: Optimized strlen?
             let mut len = 0;
-            while *arg.offset(len) != 0 {
+            while arg.add(len).read() != 0 {
                 len += 1;
             }
-            args.push([arg as usize, len as usize]);
+            args.push(core::slice::from_raw_parts(arg as *const u8, len));
+            args_envs_size_without_nul += len;
             argv = argv.offset(1);
         }
 
         // Environment variables
         let mut len = 0;
-        while !(*envp.offset(len)).is_null() {
+        while !envp.add(len).read().is_null() {
             len += 1;
         }
 
-        let mut envs: Vec<[usize; 2]> = Vec::with_capacity(len as usize);
-        while !(*envp).is_null() {
-            let env = *envp;
+        let mut envs: Vec<&[u8]> = Vec::with_capacity(len);
+        while !envp.read().is_null() {
+            let env = envp.read();
 
+            // TODO: Optimized strlen?
             let mut len = 0;
-            while *env.offset(len) != 0 {
+            while env.add(len).read() != 0 {
                 len += 1;
             }
-            envs.push([env as usize, len as usize]);
-            envp = envp.offset(1);
+            envs.push(core::slice::from_raw_parts(env as *const u8, len));
+            args_envs_size_without_nul += len;
+            envp = envp.add(1);
         }
 
-        e(syscall::fexec(*file as usize, &args, &envs)) as c_int
+        e(self::exec::fexec_impl(*file as usize, path.to_bytes(), &args, &envs, args_envs_size_without_nul)) as c_int
     }
 
     fn fchdir(fd: c_int) -> c_int {
@@ -858,7 +865,29 @@ impl Pal for Sys {
 
     #[cfg(target_arch = "x86_64")]
     unsafe fn pte_clone(stack: *mut usize) -> pid_t {
-        e(syscall::Error::demux(extra::pte_clone_inner(stack as usize))) as pid_t
+        let flags = syscall::CLONE_VM
+            | syscall::CLONE_FS
+            | syscall::CLONE_FILES
+            | syscall::CLONE_SIGHAND
+            | syscall::CLONE_STACK;
+        let flags = flags.bits();
+
+        use syscall::{Map, MapFlags};
+
+        const SIGSTACK_SIZE: usize = 1024 * 256;
+
+        // TODO: Put sigstack at high addresses?
+        let target_sigstack = match syscall::fmap(!0, &Map { address: 0, flags: MapFlags::PROT_READ | MapFlags::PROT_WRITE | MapFlags::MAP_PRIVATE, offset: 0, size: SIGSTACK_SIZE }) {
+            Ok(s) => s + SIGSTACK_SIZE,
+            Err(err) => return e(Err(err)) as pid_t,
+        };
+
+        let info = CloneInfo {
+            target_stack: stack as usize,
+            target_sigstack,
+        };
+
+        e(syscall::Error::demux(extra::pte_clone_inner(&info))) as pid_t
     }
 
     fn read(fd: c_int, buf: &mut [u8]) -> ssize_t {

+ 12 - 0
src/start.rs

@@ -112,6 +112,15 @@ fn io_init() {
         stdio::stderr = stdio::default_stderr.get();
     }
 }
+fn setup_sigstack() {
+    use syscall::{Map, MapFlags};
+    const SIGSTACK_SIZE: usize = 1024 * 256;
+    let sigstack = unsafe { syscall::fmap(!0, &Map { address: 0, offset: 0, flags: MapFlags::MAP_PRIVATE | MapFlags::PROT_READ | MapFlags::PROT_WRITE, size: SIGSTACK_SIZE }) }.expect("failed to allocate sigstack") + SIGSTACK_SIZE;
+
+    let fd = syscall::open("thisproc:current/sigstack", syscall::O_WRONLY | syscall::O_CLOEXEC).expect("failed to open thisproc:current/sigstack");
+    syscall::write(fd, &usize::to_ne_bytes(sigstack)).expect("failed to write to thisproc:current/sigstack");
+    let _ = syscall::close(fd);
+}
 
 #[inline(never)]
 #[no_mangle]
@@ -156,6 +165,9 @@ pub unsafe extern "C" fn relibc_start(sp: &'static Stack) -> ! {
     platform::inner_environ = copy_string_array(envp, len);
     platform::environ = platform::inner_environ.as_mut_ptr();
 
+    // Setup signal stack, otherwise we cannot handle any signals besides SIG_IGN/SIG_DFL behavior.
+    setup_sigstack();
+
     init_array();
 
     // Run preinit array