mod.rs 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182
  1. use core::{mem, ptr, result::Result as CoreResult, slice, str};
  2. use core::arch::asm;
  3. use syscall::{
  4. self,
  5. data::{CloneInfo, Map, Stat as redox_stat, StatVfs as redox_statvfs, TimeSpec as redox_timespec},
  6. PtraceEvent, Result,
  7. };
  8. use crate::{
  9. c_str::{CStr, CString},
  10. fs::File,
  11. header::{
  12. dirent::dirent,
  13. errno::{EINVAL, EIO, ENOMEM, EPERM, ERANGE},
  14. fcntl,
  15. string::strlen,
  16. sys_mman::{MAP_ANONYMOUS, PROT_READ, PROT_WRITE},
  17. sys_random,
  18. sys_resource::{rlimit, RLIM_INFINITY},
  19. sys_stat::{stat, S_ISGID, S_ISUID},
  20. sys_statvfs::statvfs,
  21. sys_time::{timeval, timezone},
  22. sys_utsname::{utsname, UTSLENGTH},
  23. sys_wait,
  24. time::timespec,
  25. unistd::{F_OK, R_OK, W_OK, X_OK},
  26. },
  27. io::{self, prelude::*, BufReader, SeekFrom},
  28. };
  29. use super::{errno, types::*, Pal, Read};
  30. static mut BRK_CUR: *mut c_void = ptr::null_mut();
  31. static mut BRK_END: *mut c_void = ptr::null_mut();
  32. mod clone;
  33. mod epoll;
  34. mod exec;
  35. mod extra;
  36. mod ptrace;
  37. mod signal;
  38. mod socket;
  39. macro_rules! path_from_c_str {
  40. ($c_str:expr) => {{
  41. match $c_str.to_str() {
  42. Ok(ok) => ok,
  43. Err(err) => {
  44. unsafe {
  45. errno = EINVAL;
  46. }
  47. return -1;
  48. }
  49. }
  50. }};
  51. }
  52. pub fn e(sys: Result<usize>) -> usize {
  53. match sys {
  54. Ok(ok) => ok,
  55. Err(err) => {
  56. unsafe {
  57. errno = err.errno as c_int;
  58. }
  59. !0
  60. }
  61. }
  62. }
  63. fn flatten_with_nul<T>(iter: impl IntoIterator<Item = T>) -> Box<[u8]> where T: AsRef<[u8]> {
  64. let mut vec = Vec::new();
  65. for item in iter {
  66. vec.extend(item.as_ref());
  67. vec.push(b'\0');
  68. }
  69. vec.into_boxed_slice()
  70. }
  71. pub struct Sys;
  72. impl Pal for Sys {
  73. fn access(path: &CStr, mode: c_int) -> c_int {
  74. let fd = match File::open(path, fcntl::O_PATH | fcntl::O_CLOEXEC) {
  75. Ok(fd) => fd,
  76. Err(_) => return -1,
  77. };
  78. if mode == F_OK {
  79. return 0;
  80. }
  81. let mut stat = syscall::Stat::default();
  82. if e(syscall::fstat(*fd as usize, &mut stat)) == !0 {
  83. return -1;
  84. }
  85. let uid = e(syscall::getuid());
  86. if uid == !0 {
  87. return -1;
  88. }
  89. let gid = e(syscall::getgid());
  90. if gid == !0 {
  91. return -1;
  92. }
  93. let perms = if stat.st_uid as usize == uid {
  94. stat.st_mode >> (3 * 2 & 0o7)
  95. } else if stat.st_gid as usize == gid {
  96. stat.st_mode >> (3 * 1 & 0o7)
  97. } else {
  98. stat.st_mode & 0o7
  99. };
  100. if (mode & R_OK == R_OK && perms & 0o4 != 0o4)
  101. || (mode & W_OK == W_OK && perms & 0o2 != 0o2)
  102. || (mode & X_OK == X_OK && perms & 0o1 != 0o1)
  103. {
  104. unsafe {
  105. errno = EINVAL;
  106. }
  107. return -1;
  108. }
  109. 0
  110. }
  111. fn brk(addr: *mut c_void) -> *mut c_void {
  112. unsafe {
  113. // On first invocation, allocate a buffer for brk
  114. if BRK_CUR.is_null() {
  115. // 4 megabytes of RAM ought to be enough for anybody
  116. const BRK_MAX_SIZE: usize = 4 * 1024 * 1024;
  117. let allocated = Self::mmap(
  118. ptr::null_mut(),
  119. BRK_MAX_SIZE,
  120. PROT_READ | PROT_WRITE,
  121. MAP_ANONYMOUS,
  122. 0,
  123. 0,
  124. );
  125. if allocated == !0 as *mut c_void
  126. /* MAP_FAILED */
  127. {
  128. return !0 as *mut c_void;
  129. }
  130. BRK_CUR = allocated;
  131. BRK_END = (allocated as *mut u8).add(BRK_MAX_SIZE) as *mut c_void;
  132. }
  133. if addr.is_null() {
  134. // Lookup what previous brk() invocations have set the address to
  135. BRK_CUR
  136. } else if BRK_CUR <= addr && addr < BRK_END {
  137. // It's inside buffer, return
  138. BRK_CUR = addr;
  139. addr
  140. } else {
  141. // It was outside of valid range
  142. errno = ENOMEM;
  143. ptr::null_mut()
  144. }
  145. }
  146. }
  147. fn chdir(path: &CStr) -> c_int {
  148. let path = path_from_c_str!(path);
  149. e(syscall::chdir(path)) as c_int
  150. }
  151. fn chmod(path: &CStr, mode: mode_t) -> c_int {
  152. match File::open(path, fcntl::O_PATH | fcntl::O_CLOEXEC) {
  153. Ok(file) => Self::fchmod(*file, mode),
  154. Err(_) => -1,
  155. }
  156. }
  157. fn chown(path: &CStr, owner: uid_t, group: gid_t) -> c_int {
  158. match File::open(path, fcntl::O_PATH | fcntl::O_CLOEXEC) {
  159. Ok(file) => Self::fchown(*file, owner, group),
  160. Err(_) => -1,
  161. }
  162. }
  163. fn clock_gettime(clk_id: clockid_t, tp: *mut timespec) -> c_int {
  164. let mut redox_tp = unsafe { redox_timespec::from(&*tp) };
  165. match e(syscall::clock_gettime(clk_id as usize, &mut redox_tp)) as c_int {
  166. -1 => -1,
  167. _ => {
  168. unsafe {
  169. (*tp).tv_sec = redox_tp.tv_sec;
  170. (*tp).tv_nsec = redox_tp.tv_nsec as i64;
  171. };
  172. 0
  173. }
  174. }
  175. }
  176. fn close(fd: c_int) -> c_int {
  177. e(syscall::close(fd as usize)) as c_int
  178. }
  179. fn dup(fd: c_int) -> c_int {
  180. e(syscall::dup(fd as usize, &[])) as c_int
  181. }
  182. fn dup2(fd1: c_int, fd2: c_int) -> c_int {
  183. e(syscall::dup2(fd1 as usize, fd2 as usize, &[])) as c_int
  184. }
  185. fn exit(status: c_int) -> ! {
  186. let _ = syscall::exit(status as usize);
  187. loop {}
  188. }
  189. unsafe fn execve(
  190. path: &CStr,
  191. mut argv: *const *mut c_char,
  192. mut envp: *const *mut c_char,
  193. ) -> c_int {
  194. // NOTE: We must omit O_CLOEXEC and close manually, otherwise it will be closed before we
  195. // have even read it!
  196. let mut file = match File::open(path, fcntl::O_RDONLY) {
  197. Ok(file) => file,
  198. Err(_) => return -1,
  199. };
  200. let fd = *file as usize;
  201. // With execve now being implemented in userspace, we need to check ourselves that this
  202. // file is actually executable. While checking for read permission is unnecessary as the
  203. // scheme will not allow us to read otherwise, the execute bit is completely unenforced. We
  204. // have the permission to mmap executable memory and fill it with the program even if it is
  205. // unset, so the best we can do is check that nothing is executed by accident.
  206. //
  207. // TODO: At some point we might have capabilities limiting the ability to allocate
  208. // executable memory, and in that case we might use the `escalate:` scheme as we already do
  209. // when the binary needs setuid/setgid.
  210. let mut stat = redox_stat::default();
  211. if e(syscall::fstat(fd, &mut stat)) == !0 {
  212. return -1;
  213. }
  214. let uid = e(syscall::getuid());
  215. if uid == !0 {
  216. return -1;
  217. }
  218. let gid = e(syscall::getuid());
  219. if gid == !0 {
  220. return -1;
  221. }
  222. let mode = if uid == stat.st_uid as usize {
  223. (stat.st_mode >> 3 * 2) & 0o7
  224. } else if gid == stat.st_gid as usize {
  225. (stat.st_mode >> 3 * 1) & 0o7
  226. } else {
  227. stat.st_mode & 0o7
  228. };
  229. if mode & 0o1 == 0o0 {
  230. errno = EPERM;
  231. return -1;
  232. }
  233. let wants_setugid = stat.st_mode & ((S_ISUID | S_ISGID) as u16) != 0;
  234. // Count arguments
  235. let mut len = 0;
  236. while !(*argv.add(len)).is_null() {
  237. len += 1;
  238. }
  239. let mut args: Vec<&[u8]> = Vec::with_capacity(len);
  240. // Read shebang (for example #!/bin/sh)
  241. let mut _interpreter_path = None;
  242. let is_interpreted = {
  243. let mut read = 0;
  244. let mut shebang = [0; 2];
  245. while read < 2 {
  246. match file.read(&mut shebang) {
  247. Ok(0) => break,
  248. Ok(i) => read += i,
  249. Err(_) => return -1,
  250. }
  251. }
  252. shebang == *b"#!"
  253. };
  254. // Since the fexec implementation is almost fully done in userspace, the kernel can no
  255. // longer set UID/GID accordingly, and this code checking for them before using
  256. // hypothetical interfaces to upgrade UID/GID, can not be trusted. So we ask the
  257. // `escalate:` scheme for help. Note that `escalate:` can be deliberately excluded from the
  258. // scheme namespace to deny privilege escalation (such as su/sudo/doas) for untrusted
  259. // processes.
  260. //
  261. // According to execve(2), Linux and most other UNIXes ignore setuid/setgid for interpreted
  262. // executables and thereby simply keep the privileges as is. For compatibility we do that
  263. // too.
  264. if is_interpreted {
  265. // So, this file is interpreted.
  266. // Then, read the actual interpreter:
  267. let mut interpreter = Vec::new();
  268. if BufReader::new(&mut file).read_until(b'\n', &mut interpreter).is_err() {
  269. return -1;
  270. }
  271. if interpreter.ends_with(&[b'\n']) {
  272. interpreter.pop().unwrap();
  273. }
  274. let cstring = match CString::new(interpreter) {
  275. Ok(cstring) => cstring,
  276. Err(_) => return -1,
  277. };
  278. file = match File::open(&cstring, fcntl::O_RDONLY) {
  279. Ok(file) => file,
  280. Err(_) => return -1,
  281. };
  282. // Make sure path is kept alive long enough, and push it to the arguments
  283. _interpreter_path = Some(cstring);
  284. let path_ref = _interpreter_path.as_ref().unwrap();
  285. args.push(path_ref.as_bytes());
  286. } else {
  287. if file.seek(SeekFrom::Start(0)).is_err() {
  288. return -1;
  289. }
  290. }
  291. let mut args_envs_size_without_nul = 0;
  292. // Arguments
  293. while !argv.read().is_null() {
  294. let arg = argv.read();
  295. let len = strlen(arg);
  296. args.push(core::slice::from_raw_parts(arg as *const u8, len));
  297. args_envs_size_without_nul += len;
  298. argv = argv.add(1);
  299. }
  300. // Environment variables
  301. let mut len = 0;
  302. while !envp.add(len).read().is_null() {
  303. len += 1;
  304. }
  305. let mut envs: Vec<&[u8]> = Vec::with_capacity(len);
  306. while !envp.read().is_null() {
  307. let env = envp.read();
  308. let len = strlen(env);
  309. envs.push(core::slice::from_raw_parts(env as *const u8, len));
  310. args_envs_size_without_nul += len;
  311. envp = envp.add(1);
  312. }
  313. // Close all O_CLOEXEC file descriptors. TODO: close_range?
  314. {
  315. // NOTE: This approach of implementing O_CLOEXEC will not work in multithreaded
  316. // scenarios. While execve() is undefined according to POSIX if there exist sibling
  317. // threads, it could still be allowed by keeping certain file descriptors and instead
  318. // set the active file table.
  319. let name = CStr::from_bytes_with_nul(b"thisproc:current/filetable\0").expect("string should be valid");
  320. let files_fd = match File::open(name, fcntl::O_RDONLY) {
  321. Ok(f) => f,
  322. Err(_) => return -1,
  323. };
  324. for line in BufReader::new(files_fd).lines() {
  325. let line = match line {
  326. Ok(l) => l,
  327. Err(_) => break,
  328. };
  329. let fd = match line.parse::<usize>() {
  330. Ok(f) => f,
  331. Err(_) => continue,
  332. };
  333. let flags = Self::fcntl(fd as c_int, fcntl::F_GETFD, 0);
  334. if flags != -1 {
  335. if flags & fcntl::O_CLOEXEC == fcntl::O_CLOEXEC {
  336. let _ = Self::close(fd as c_int);
  337. }
  338. }
  339. }
  340. }
  341. if !is_interpreted && wants_setugid {
  342. // Make sure the last file descriptor not covered by O_CLOEXEC is not leaked.
  343. drop(file);
  344. let name = CStr::from_bytes_with_nul(b"escalate:\0").expect("string should be valid");
  345. // We are now going to invoke `escalate:` rather than loading the program ourselves.
  346. let mut escalate_fd = match File::open(name, fcntl::O_WRONLY) {
  347. Ok(f) => f,
  348. Err(_) => return -1,
  349. };
  350. // First, we write the path.
  351. //
  352. // TODO: For improved security, use a hypothetical SYS_DUP_FORWARD syscall to give the
  353. // scheme our file descriptor. It can check through the kernel-overwritten stat.st_dev
  354. // field that it pertains to a "trusted" scheme (i.e. of at least the privilege the
  355. // new uid/gid has), although for now only root can open schemes. Passing a file
  356. // descriptor and not a path will allow escalated to run in a limited namespace.
  357. //
  358. // TODO: Plus, at this point fexecve is not implemented (but specified in
  359. // POSIX.1-2008), and to avoid bad syscalls such as fpath, passing a file descriptor
  360. // would be better.
  361. if escalate_fd.write_all(path.to_bytes()).is_err() {
  362. return -1;
  363. }
  364. // Second, we write the flattened args and envs with NUL characters separating
  365. // individual items. This can be copied directly into the new executable's memory.
  366. if escalate_fd.write_all(&flatten_with_nul(args)).is_err() {
  367. return -1;
  368. }
  369. if escalate_fd.write_all(&flatten_with_nul(envs)).is_err() {
  370. return -1;
  371. }
  372. // Closing will notify the scheme, and from that point we will no longer have control
  373. // over this process (unless it fails). We do this manually since drop cannot handle
  374. // errors.
  375. let fd = *escalate_fd as usize;
  376. core::mem::forget(escalate_fd);
  377. if let Err(err) = syscall::close(fd) {
  378. return e(Err(err)) as c_int;
  379. }
  380. unreachable!()
  381. } else {
  382. e(self::exec::fexec_impl(file, path.to_bytes(), &args, &envs, args_envs_size_without_nul)) as c_int
  383. }
  384. }
  385. fn fchdir(fd: c_int) -> c_int {
  386. let mut buf = [0; 4096];
  387. let res = e(syscall::fpath(fd as usize, &mut buf));
  388. if res == !0 {
  389. !0
  390. } else {
  391. match str::from_utf8(&buf[..res]) {
  392. Ok(path) => e(syscall::chdir(&path)) as c_int,
  393. Err(_) => {
  394. unsafe { errno = EINVAL };
  395. return -1;
  396. }
  397. }
  398. }
  399. }
  400. fn fchmod(fd: c_int, mode: mode_t) -> c_int {
  401. e(syscall::fchmod(fd as usize, mode as u16)) as c_int
  402. }
  403. fn fchown(fd: c_int, owner: uid_t, group: gid_t) -> c_int {
  404. e(syscall::fchown(fd as usize, owner as u32, group as u32)) as c_int
  405. }
  406. fn fcntl(fd: c_int, cmd: c_int, args: c_int) -> c_int {
  407. e(syscall::fcntl(fd as usize, cmd as usize, args as usize)) as c_int
  408. }
  409. fn flock(_fd: c_int, _operation: c_int) -> c_int {
  410. // TODO: Redox does not have file locking yet
  411. 0
  412. }
  413. fn fork() -> pid_t {
  414. e(clone::fork_impl()) as pid_t
  415. }
  416. fn fstat(fildes: c_int, buf: *mut stat) -> c_int {
  417. let mut redox_buf: redox_stat = redox_stat::default();
  418. match e(syscall::fstat(fildes as usize, &mut redox_buf)) {
  419. 0 => {
  420. if let Some(buf) = unsafe { buf.as_mut() } {
  421. buf.st_dev = redox_buf.st_dev as dev_t;
  422. buf.st_ino = redox_buf.st_ino as ino_t;
  423. buf.st_nlink = redox_buf.st_nlink as nlink_t;
  424. buf.st_mode = redox_buf.st_mode as mode_t;
  425. buf.st_uid = redox_buf.st_uid as uid_t;
  426. buf.st_gid = redox_buf.st_gid as gid_t;
  427. // TODO st_rdev
  428. buf.st_rdev = 0;
  429. buf.st_size = redox_buf.st_size as off_t;
  430. buf.st_blksize = redox_buf.st_blksize as blksize_t;
  431. buf.st_atim = timespec {
  432. tv_sec: redox_buf.st_atime as time_t,
  433. tv_nsec: redox_buf.st_atime_nsec as c_long,
  434. };
  435. buf.st_mtim = timespec {
  436. tv_sec: redox_buf.st_mtime as time_t,
  437. tv_nsec: redox_buf.st_mtime_nsec as c_long,
  438. };
  439. buf.st_ctim = timespec {
  440. tv_sec: redox_buf.st_ctime as time_t,
  441. tv_nsec: redox_buf.st_ctime_nsec as c_long,
  442. };
  443. }
  444. 0
  445. }
  446. _ => -1,
  447. }
  448. }
  449. fn fstatvfs(fildes: c_int, buf: *mut statvfs) -> c_int {
  450. let mut kbuf: redox_statvfs = redox_statvfs::default();
  451. match e(syscall::fstatvfs(fildes as usize, &mut kbuf)) {
  452. 0 => {
  453. unsafe {
  454. if !buf.is_null() {
  455. (*buf).f_bsize = kbuf.f_bsize as c_ulong;
  456. (*buf).f_frsize = kbuf.f_bsize as c_ulong;
  457. (*buf).f_blocks = kbuf.f_blocks;
  458. (*buf).f_bfree = kbuf.f_bfree;
  459. (*buf).f_bavail = kbuf.f_bavail;
  460. //TODO
  461. (*buf).f_files = 0;
  462. (*buf).f_ffree = 0;
  463. (*buf).f_favail = 0;
  464. (*buf).f_fsid = 0;
  465. (*buf).f_flag = 0;
  466. (*buf).f_namemax = 0;
  467. }
  468. }
  469. 0
  470. }
  471. _ => -1,
  472. }
  473. }
  474. fn fsync(fd: c_int) -> c_int {
  475. e(syscall::fsync(fd as usize)) as c_int
  476. }
  477. fn ftruncate(fd: c_int, len: off_t) -> c_int {
  478. e(syscall::ftruncate(fd as usize, len as usize)) as c_int
  479. }
  480. fn futex(addr: *mut c_int, op: c_int, val: c_int, val2: usize) -> c_int {
  481. match unsafe {
  482. syscall::futex(
  483. addr as *mut i32,
  484. op as usize,
  485. val as i32,
  486. val2,
  487. ptr::null_mut(),
  488. )
  489. } {
  490. Ok(success) => success as c_int,
  491. Err(err) => -(err.errno as c_int),
  492. }
  493. }
  494. fn futimens(fd: c_int, times: *const timespec) -> c_int {
  495. let times = [unsafe { redox_timespec::from(&*times) }, unsafe {
  496. redox_timespec::from(&*times.offset(1))
  497. }];
  498. e(syscall::futimens(fd as usize, &times)) as c_int
  499. }
  500. fn utimens(path: &CStr, times: *const timespec) -> c_int {
  501. match File::open(path, fcntl::O_PATH | fcntl::O_CLOEXEC) {
  502. Ok(file) => Self::futimens(*file, times),
  503. Err(_) => -1,
  504. }
  505. }
  506. fn getcwd(buf: *mut c_char, size: size_t) -> *mut c_char {
  507. let buf_slice = unsafe { slice::from_raw_parts_mut(buf as *mut u8, size as usize) };
  508. if !buf_slice.is_empty() {
  509. let nonnull_size = buf_slice.len() - 1;
  510. let read = e(syscall::getcwd(&mut buf_slice[..nonnull_size]));
  511. if read == !0 {
  512. ptr::null_mut()
  513. } else if read == nonnull_size {
  514. unsafe {
  515. errno = ERANGE;
  516. }
  517. ptr::null_mut()
  518. } else {
  519. for b in &mut buf_slice[read..] {
  520. *b = 0;
  521. }
  522. buf
  523. }
  524. } else {
  525. unsafe {
  526. errno = EINVAL;
  527. }
  528. ptr::null_mut()
  529. }
  530. }
  531. fn getdents(fd: c_int, mut dirents: *mut dirent, max_bytes: usize) -> c_int {
  532. //TODO: rewrite this code. Originally the *dirents = dirent { ... } stuff below caused
  533. // massive issues. This has been hacked around, but it still isn't perfect
  534. // Get initial reading position
  535. let mut read = match syscall::lseek(fd as usize, 0, syscall::SEEK_CUR) {
  536. Ok(pos) => pos as isize,
  537. Err(err) => return -err.errno,
  538. };
  539. let mut written = 0;
  540. let mut buf = [0; 1024];
  541. let mut name = [0; 256];
  542. let mut i = 0;
  543. let mut flush = |written: &mut usize, i: &mut usize, name: &mut [c_char; 256]| {
  544. if *i < name.len() {
  545. // Set NUL byte
  546. name[*i] = 0;
  547. }
  548. // Get size: full size - unused bytes
  549. if *written + mem::size_of::<dirent>() > max_bytes {
  550. // Seek back to after last read entry and return
  551. match syscall::lseek(fd as usize, read, syscall::SEEK_SET) {
  552. Ok(_) => return Some(*written as c_int),
  553. Err(err) => return Some(-err.errno),
  554. }
  555. }
  556. let size = mem::size_of::<dirent>() - name.len().saturating_sub(*i + 1);
  557. unsafe {
  558. //This is the offending code mentioned above
  559. *dirents = dirent {
  560. d_ino: 0,
  561. d_off: read as off_t,
  562. d_reclen: size as c_ushort,
  563. d_type: 0,
  564. d_name: *name,
  565. };
  566. dirents = (dirents as *mut u8).offset(size as isize) as *mut dirent;
  567. }
  568. read += *i as isize + /* newline */ 1;
  569. *written += size;
  570. *i = 0;
  571. None
  572. };
  573. loop {
  574. // Read a chunk from the directory
  575. let len = match syscall::read(fd as usize, &mut buf) {
  576. Ok(0) => {
  577. if i > 0 {
  578. if let Some(value) = flush(&mut written, &mut i, &mut name) {
  579. return value;
  580. }
  581. }
  582. return written as c_int;
  583. }
  584. Ok(n) => n,
  585. Err(err) => return -err.errno,
  586. };
  587. // Handle everything
  588. let mut start = 0;
  589. while start < len {
  590. let buf = &buf[start..len];
  591. // Copy everything up until a newline
  592. let newline = buf.iter().position(|&c| c == b'\n');
  593. let pre_len = newline.unwrap_or(buf.len());
  594. let post_len = newline.map(|i| i + 1).unwrap_or(buf.len());
  595. if i < pre_len {
  596. // Reserve space for NUL byte
  597. let name_len = name.len() - 1;
  598. let name = &mut name[i..name_len];
  599. let copy = pre_len.min(name.len());
  600. let buf = unsafe { slice::from_raw_parts(buf.as_ptr() as *const c_char, copy) };
  601. name[..copy].copy_from_slice(buf);
  602. }
  603. i += pre_len;
  604. start += post_len;
  605. // Write the directory entry
  606. if newline.is_some() {
  607. if let Some(value) = flush(&mut written, &mut i, &mut name) {
  608. return value;
  609. }
  610. }
  611. }
  612. }
  613. }
  614. fn getegid() -> gid_t {
  615. e(syscall::getegid()) as gid_t
  616. }
  617. fn geteuid() -> uid_t {
  618. e(syscall::geteuid()) as uid_t
  619. }
  620. fn getgid() -> gid_t {
  621. e(syscall::getgid()) as gid_t
  622. }
  623. fn getpagesize() -> usize {
  624. 4096
  625. }
  626. fn getpgid(pid: pid_t) -> pid_t {
  627. e(syscall::getpgid(pid as usize)) as pid_t
  628. }
  629. fn getpid() -> pid_t {
  630. e(syscall::getpid()) as pid_t
  631. }
  632. fn getppid() -> pid_t {
  633. e(syscall::getppid()) as pid_t
  634. }
  635. fn getrandom(buf: &mut [u8], flags: c_uint) -> ssize_t {
  636. //TODO: make this a system call?
  637. let path = if flags & sys_random::GRND_RANDOM != 0 {
  638. //TODO: /dev/random equivalent
  639. "rand:"
  640. } else {
  641. "rand:"
  642. };
  643. let mut open_flags = syscall::O_RDONLY | syscall::O_CLOEXEC;
  644. if flags & sys_random::GRND_NONBLOCK != 0 {
  645. open_flags |= syscall::O_NONBLOCK;
  646. }
  647. let fd = e(syscall::open(path, open_flags));
  648. if fd == !0 {
  649. return -1;
  650. }
  651. let res = e(syscall::read(fd, buf)) as ssize_t;
  652. let _ = syscall::close(fd);
  653. res
  654. }
  655. unsafe fn getrlimit(resource: c_int, rlim: *mut rlimit) -> c_int {
  656. //TODO
  657. if !rlim.is_null() {
  658. (*rlim).rlim_cur = RLIM_INFINITY;
  659. (*rlim).rlim_max = RLIM_INFINITY;
  660. }
  661. 0
  662. }
  663. fn gettid() -> pid_t {
  664. //TODO
  665. Self::getpid()
  666. }
  667. fn gettimeofday(tp: *mut timeval, tzp: *mut timezone) -> c_int {
  668. let mut redox_tp = redox_timespec::default();
  669. let err = e(syscall::clock_gettime(
  670. syscall::CLOCK_REALTIME,
  671. &mut redox_tp,
  672. )) as c_int;
  673. if err < 0 {
  674. return err;
  675. }
  676. unsafe {
  677. (*tp).tv_sec = redox_tp.tv_sec as time_t;
  678. (*tp).tv_usec = (redox_tp.tv_nsec / 1000) as suseconds_t;
  679. if !tzp.is_null() {
  680. (*tzp).tz_minuteswest = 0;
  681. (*tzp).tz_dsttime = 0;
  682. }
  683. }
  684. 0
  685. }
  686. fn getuid() -> uid_t {
  687. e(syscall::getuid()) as pid_t
  688. }
  689. fn lchown(path: &CStr, owner: uid_t, group: gid_t) -> c_int {
  690. // TODO: Is it correct for regular chown to use O_PATH? On Linux the meaning of that flag
  691. // is to forbid file operations, including fchown.
  692. // unlike chown, never follow symbolic links
  693. match File::open(path, fcntl::O_CLOEXEC | fcntl::O_NOFOLLOW) {
  694. Ok(file) => Self::fchown(*file, owner, group),
  695. Err(_) => -1,
  696. }
  697. }
  698. fn link(path1: &CStr, path2: &CStr) -> c_int {
  699. e(unsafe { syscall::link(path1.as_ptr() as *const u8, path2.as_ptr() as *const u8) })
  700. as c_int
  701. }
  702. fn lseek(fd: c_int, offset: off_t, whence: c_int) -> off_t {
  703. e(syscall::lseek(
  704. fd as usize,
  705. offset as isize,
  706. whence as usize,
  707. )) as off_t
  708. }
  709. fn mkdir(path: &CStr, mode: mode_t) -> c_int {
  710. match File::create(
  711. path,
  712. fcntl::O_DIRECTORY | fcntl::O_EXCL | fcntl::O_CLOEXEC,
  713. 0o777,
  714. ) {
  715. Ok(_fd) => 0,
  716. Err(_) => -1,
  717. }
  718. }
  719. fn mkfifo(path: &CStr, mode: mode_t) -> c_int {
  720. match File::create(
  721. path,
  722. fcntl::O_CREAT | fcntl::O_CLOEXEC,
  723. syscall::MODE_FIFO as mode_t | (mode & 0o777),
  724. ) {
  725. Ok(fd) => 0,
  726. Err(_) => -1,
  727. }
  728. }
  729. unsafe fn mlock(addr: *const c_void, len: usize) -> c_int {
  730. // Redox never swaps
  731. 0
  732. }
  733. fn mlockall(flags: c_int) -> c_int {
  734. // Redox never swaps
  735. 0
  736. }
  737. unsafe fn mmap(
  738. addr: *mut c_void,
  739. len: usize,
  740. prot: c_int,
  741. flags: c_int,
  742. fildes: c_int,
  743. off: off_t,
  744. ) -> *mut c_void {
  745. let map = Map {
  746. offset: off as usize,
  747. size: len,
  748. flags: syscall::MapFlags::from_bits_truncate(
  749. ((prot as usize) << 16) | ((flags as usize) & 0xFFFF),
  750. ),
  751. address: addr as usize,
  752. };
  753. if flags & MAP_ANONYMOUS == MAP_ANONYMOUS {
  754. e(syscall::fmap(!0, &map)) as *mut c_void
  755. } else {
  756. e(syscall::fmap(fildes as usize, &map)) as *mut c_void
  757. }
  758. }
  759. unsafe fn mprotect(addr: *mut c_void, len: usize, prot: c_int) -> c_int {
  760. e(syscall::mprotect(
  761. addr as usize,
  762. len,
  763. syscall::MapFlags::from_bits((prot as usize) << 16)
  764. .expect("mprotect: invalid bit pattern"),
  765. )) as c_int
  766. }
  767. unsafe fn msync(addr: *mut c_void, len: usize, flags: c_int) -> c_int {
  768. eprintln!("msync {:p} {:x} {:x}", addr, len, flags);
  769. e(Err(syscall::Error::new(syscall::ENOSYS))) as c_int
  770. /* TODO
  771. e(syscall::msync(
  772. addr as usize,
  773. len,
  774. flags
  775. )) as c_int
  776. */
  777. }
  778. unsafe fn munlock(addr: *const c_void, len: usize) -> c_int {
  779. // Redox never swaps
  780. 0
  781. }
  782. fn munlockall() -> c_int {
  783. // Redox never swaps
  784. 0
  785. }
  786. unsafe fn munmap(addr: *mut c_void, len: usize) -> c_int {
  787. if e(syscall::funmap(addr as usize, len)) == !0 {
  788. return !0;
  789. }
  790. 0
  791. }
  792. fn nanosleep(rqtp: *const timespec, rmtp: *mut timespec) -> c_int {
  793. let redox_rqtp = unsafe { redox_timespec::from(&*rqtp) };
  794. let mut redox_rmtp: redox_timespec;
  795. if rmtp.is_null() {
  796. redox_rmtp = redox_timespec::default();
  797. } else {
  798. redox_rmtp = unsafe { redox_timespec::from(&*rmtp) };
  799. }
  800. match e(syscall::nanosleep(&redox_rqtp, &mut redox_rmtp)) as c_int {
  801. -1 => -1,
  802. _ => {
  803. unsafe {
  804. if !rmtp.is_null() {
  805. (*rmtp).tv_sec = redox_rmtp.tv_sec;
  806. (*rmtp).tv_nsec = redox_rmtp.tv_nsec as i64;
  807. }
  808. }
  809. 0
  810. }
  811. }
  812. }
  813. fn open(path: &CStr, oflag: c_int, mode: mode_t) -> c_int {
  814. let path = path_from_c_str!(path);
  815. e(syscall::open(
  816. path,
  817. ((oflag as usize) & 0xFFFF_0000) | ((mode as usize) & 0xFFFF),
  818. )) as c_int
  819. }
  820. fn pipe2(fds: &mut [c_int], flags: c_int) -> c_int {
  821. let mut usize_fds: [usize; 2] = [0; 2];
  822. let res = e(syscall::pipe2(&mut usize_fds, flags as usize));
  823. fds[0] = usize_fds[0] as c_int;
  824. fds[1] = usize_fds[1] as c_int;
  825. res as c_int
  826. }
  827. #[cfg(target_arch = "aarch64")]
  828. unsafe fn pte_clone(stack: *mut usize) -> pid_t {
  829. //TODO: aarch64
  830. unimplemented!("pte_clone not implemented on aarch64");
  831. }
  832. #[cfg(target_arch = "x86")]
  833. unsafe fn pte_clone(stack: *mut usize) -> pid_t {
  834. //TODO: x86
  835. unimplemented!("pte_clone not implemented on x86");
  836. }
  837. #[cfg(target_arch = "x86_64")]
  838. unsafe fn pte_clone(stack: *mut usize) -> pid_t {
  839. e(clone::pte_clone_impl(stack)) as pid_t
  840. }
  841. fn read(fd: c_int, buf: &mut [u8]) -> ssize_t {
  842. e(syscall::read(fd as usize, buf)) as ssize_t
  843. }
  844. fn fpath(fildes: c_int, out: &mut [u8]) -> ssize_t {
  845. e(syscall::fpath(fildes as usize, out)) as ssize_t
  846. }
  847. fn readlink(pathname: &CStr, out: &mut [u8]) -> ssize_t {
  848. match File::open(pathname, fcntl::O_RDONLY | fcntl::O_SYMLINK | fcntl::O_CLOEXEC) {
  849. Ok(file) => Self::read(*file, out),
  850. Err(_) => return -1,
  851. }
  852. }
  853. fn rename(oldpath: &CStr, newpath: &CStr) -> c_int {
  854. let newpath = path_from_c_str!(newpath);
  855. match File::open(oldpath, fcntl::O_PATH | fcntl::O_CLOEXEC) {
  856. Ok(file) => e(syscall::frename(*file as usize, newpath)) as c_int,
  857. Err(_) => -1,
  858. }
  859. }
  860. fn rmdir(path: &CStr) -> c_int {
  861. let path = path_from_c_str!(path);
  862. e(syscall::rmdir(path)) as c_int
  863. }
  864. fn sched_yield() -> c_int {
  865. e(syscall::sched_yield()) as c_int
  866. }
  867. fn setpgid(pid: pid_t, pgid: pid_t) -> c_int {
  868. e(syscall::setpgid(pid as usize, pgid as usize)) as c_int
  869. }
  870. fn setregid(rgid: gid_t, egid: gid_t) -> c_int {
  871. e(syscall::setregid(rgid as usize, egid as usize)) as c_int
  872. }
  873. fn setreuid(ruid: uid_t, euid: uid_t) -> c_int {
  874. e(syscall::setreuid(ruid as usize, euid as usize)) as c_int
  875. }
  876. fn symlink(path1: &CStr, path2: &CStr) -> c_int {
  877. let mut file = match File::create(
  878. path2,
  879. fcntl::O_WRONLY | fcntl::O_SYMLINK | fcntl::O_CLOEXEC,
  880. 0o777,
  881. ) {
  882. Ok(ok) => ok,
  883. Err(_) => return -1,
  884. };
  885. if file.write(path1.to_bytes()).is_err() {
  886. return -1;
  887. }
  888. 0
  889. }
  890. fn umask(mask: mode_t) -> mode_t {
  891. e(syscall::umask(mask as usize)) as mode_t
  892. }
  893. fn uname(utsname: *mut utsname) -> c_int {
  894. fn gethostname(name: &mut [u8]) -> io::Result<()> {
  895. if name.is_empty() {
  896. return Ok(());
  897. }
  898. let mut file = File::open(
  899. &CString::new("/etc/hostname").unwrap(),
  900. fcntl::O_RDONLY | fcntl::O_CLOEXEC,
  901. )?;
  902. let mut read = 0;
  903. let name_len = name.len();
  904. loop {
  905. match file.read(&mut name[read..name_len - 1])? {
  906. 0 => break,
  907. n => read += n,
  908. }
  909. }
  910. name[read] = 0;
  911. Ok(())
  912. }
  913. fn inner(utsname: *mut utsname) -> CoreResult<(), i32> {
  914. match gethostname(unsafe {
  915. slice::from_raw_parts_mut(
  916. (*utsname).nodename.as_mut_ptr() as *mut u8,
  917. (*utsname).nodename.len(),
  918. )
  919. }) {
  920. Ok(_) => (),
  921. Err(_) => return Err(EIO),
  922. }
  923. let file_path = c_str!("sys:uname");
  924. let mut file = match File::open(file_path, fcntl::O_RDONLY | fcntl::O_CLOEXEC) {
  925. Ok(ok) => ok,
  926. Err(_) => return Err(EIO),
  927. };
  928. let mut lines = BufReader::new(&mut file).lines();
  929. let mut read_line = |dst: &mut [c_char]| {
  930. let line = match lines.next() {
  931. Some(Ok(l)) => match CString::new(l) {
  932. Ok(l) => l,
  933. Err(_) => return Err(EIO),
  934. },
  935. None | Some(Err(_)) => return Err(EIO),
  936. };
  937. let line_slice: &[c_char] = unsafe { mem::transmute(line.as_bytes_with_nul()) };
  938. if line_slice.len() <= UTSLENGTH {
  939. dst[..line_slice.len()].copy_from_slice(line_slice);
  940. Ok(())
  941. } else {
  942. Err(EIO)
  943. }
  944. };
  945. unsafe {
  946. read_line(&mut (*utsname).sysname)?;
  947. read_line(&mut (*utsname).release)?;
  948. read_line(&mut (*utsname).machine)?;
  949. // Version is not provided
  950. ptr::write_bytes((*utsname).version.as_mut_ptr(), 0, UTSLENGTH);
  951. // Redox doesn't provide domainname in sys:uname
  952. //read_line(&mut (*utsname).domainname)?;
  953. ptr::write_bytes((*utsname).domainname.as_mut_ptr(), 0, UTSLENGTH);
  954. }
  955. Ok(())
  956. }
  957. match inner(utsname) {
  958. Ok(()) => 0,
  959. Err(err) => unsafe {
  960. errno = err;
  961. -1
  962. },
  963. }
  964. }
  965. fn unlink(path: &CStr) -> c_int {
  966. let path = path_from_c_str!(path);
  967. e(syscall::unlink(path)) as c_int
  968. }
  969. fn waitpid(mut pid: pid_t, stat_loc: *mut c_int, options: c_int) -> pid_t {
  970. if pid == !0 {
  971. pid = 0;
  972. }
  973. let mut res = None;
  974. let mut status = 0;
  975. let inner = |status: &mut usize, flags| {
  976. syscall::waitpid(
  977. pid as usize,
  978. status,
  979. syscall::WaitFlags::from_bits(flags as usize)
  980. .expect("waitpid: invalid bit pattern"),
  981. )
  982. };
  983. // First, allow ptrace to handle waitpid
  984. // TODO: Handle special PIDs here (such as -1)
  985. let state = ptrace::init_state();
  986. let mut sessions = state.sessions.lock();
  987. if let Ok(session) = ptrace::get_session(&mut sessions, pid) {
  988. if options & sys_wait::WNOHANG != sys_wait::WNOHANG {
  989. let mut _event = PtraceEvent::default();
  990. let _ = (&mut &session.tracer).read(&mut _event);
  991. res = Some(e(inner(
  992. &mut status,
  993. options | sys_wait::WNOHANG | sys_wait::WUNTRACED,
  994. )));
  995. if res == Some(0) {
  996. // WNOHANG, just pretend ptrace SIGSTOP:ped this
  997. status = (syscall::SIGSTOP << 8) | 0x7f;
  998. assert!(syscall::wifstopped(status));
  999. assert_eq!(syscall::wstopsig(status), syscall::SIGSTOP);
  1000. res = Some(pid as usize);
  1001. }
  1002. }
  1003. }
  1004. // If ptrace didn't impact this waitpid, proceed *almost* as
  1005. // normal: We still need to add WUNTRACED, but we only return
  1006. // it if (and only if) a ptrace traceme was activated during
  1007. // the wait.
  1008. let res = res.unwrap_or_else(|| loop {
  1009. let res = e(inner(&mut status, options | sys_wait::WUNTRACED));
  1010. // TODO: Also handle special PIDs here
  1011. if !syscall::wifstopped(res) || ptrace::is_traceme(pid) {
  1012. break res;
  1013. }
  1014. });
  1015. // If stat_loc is non-null, set that and the return
  1016. unsafe {
  1017. if !stat_loc.is_null() {
  1018. *stat_loc = status as c_int;
  1019. }
  1020. }
  1021. res as pid_t
  1022. }
  1023. fn write(fd: c_int, buf: &[u8]) -> ssize_t {
  1024. e(syscall::write(fd as usize, buf)) as ssize_t
  1025. }
  1026. fn verify() -> bool {
  1027. // GETPID on Redox is 20, which is WRITEV on Linux
  1028. e(unsafe { syscall::syscall5(syscall::number::SYS_GETPID, !0, !0, !0, !0, !0) }) != !0
  1029. }
  1030. }