瀏覽代碼

Implement BRK caching.

Previously, BRK was making two syscalls in glibc due to requesting
the program break and then setting it. This is fixed by caching this
program break.

This commit prepares for OS memtrimming, which needs a well-defined
thread-safe function.

- Completely remove libc dependency due to allocation behavior being
  uncertain in non-glibc implementation (the POSIX standard tells little
  to nothing about this).
- Move the default OOM handler to shim.
- Give an error message in the default OOM handler.
- Add SBRK benchmark.

This should improve performance of fresh allocations by 30-40%.
ticki 8 年之前
父節點
當前提交
998377c6b4
共有 14 個文件被更改,包括 216 次插入141 次删除
  1. 1 3
      benches/box.rs
  2. 1 3
      benches/mpsc.rs
  3. 11 0
      benches/sbrk.rs
  4. 1 3
      benches/vec.rs
  5. 1 3
      benches/vec_box.rs
  6. 3 4
      shim/Cargo.toml
  7. 41 23
      shim/src/lib.rs
  8. 2 2
      src/allocator.rs
  9. 3 23
      src/block.rs
  10. 1 0
      src/bookkeeper.rs
  11. 129 26
      src/brk.rs
  12. 4 12
      src/fail.rs
  13. 3 2
      src/lib.rs
  14. 15 37
      src/sys.rs

+ 1 - 3
benches/box.rs

@@ -3,10 +3,8 @@
 extern crate ralloc;
 extern crate test;
 
-use test::Bencher;
-
 #[bench]
-fn bench(b: &mut Bencher) {
+fn bench_box(b: &mut test::Bencher) {
     b.iter(|| {
         let _bx1 = Box::new(0xF000D);
         let _bx2 = Box::new(0xF0002);

+ 1 - 3
benches/mpsc.rs

@@ -6,10 +6,8 @@ extern crate test;
 use std::thread;
 use std::sync::mpsc;
 
-use test::Bencher;
-
 #[bench]
-fn bench(b: &mut Bencher) {
+fn bench_mpsc(b: &mut test::Bencher) {
     b.iter(|| {
         let (tx, rx) = mpsc::channel::<Box<u64>>();
         thread::spawn(move || {

+ 11 - 0
benches/sbrk.rs

@@ -0,0 +1,11 @@
+#![feature(test)]
+
+extern crate ralloc;
+extern crate test;
+
+#[bench]
+fn bench_sbrk(b: &mut test::Bencher) {
+    b.iter(|| {
+        ralloc::sbrk(200).unwrap()
+    });
+}

+ 1 - 3
benches/vec.rs

@@ -3,10 +3,8 @@
 extern crate ralloc;
 extern crate test;
 
-use test::Bencher;
-
 #[bench]
-fn bench(b: &mut Bencher) {
+fn bench_vec(b: &mut test::Bencher) {
     b.iter(|| {
         let mut stuff = Vec::with_capacity(10);
 

+ 1 - 3
benches/vec_box.rs

@@ -3,10 +3,8 @@
 extern crate ralloc;
 extern crate test;
 
-use test::Bencher;
-
 #[bench]
-fn bench(b: &mut Bencher) {
+fn bench_vec_box(b: &mut test::Bencher) {
     b.iter(|| {
         let mut stuff = Vec::with_capacity(10);
 

+ 3 - 4
shim/Cargo.toml

@@ -1,7 +1,7 @@
 [package]
 name = "ralloc_shim"
 version = "0.1.1"
-authors = ["Jeremy Soller <jackpot51@gmail.com>", "Ticki"]
+authors = ["Ticki", "Jeremy Soller <jackpot51@gmail.com>"]
 
 [profile.release]
 panic = "abort"
@@ -12,6 +12,5 @@ lto = true
 debug-assertions = false
 codegen-units = 1
 
-[dependencies.libc]
-version = "0.2"
-default-features = false
+[dependencies]
+syscall = "0.2.1"

+ 41 - 23
shim/src/lib.rs

@@ -1,39 +1,61 @@
 //! Symbols and externs that `ralloc` depends on.
 //!
 //! This crate provides implementation/import of these in Linux, BSD, and Mac OS.
+//!
+//! # Important
+//!
+//! You CANNOT use libc library calls, due to no guarantees being made about allocations of the
+//! functions in the POSIX specification. Therefore, we use the system calls directly.
 
-#![feature(linkage)]
+#![feature(linkage, core_intrinsics)]
 #![no_std]
 #![warn(missing_docs)]
 
-extern crate libc;
+#[macro_use]
+extern crate syscall;
 
-pub use libc::sched_yield;
+use core::intrinsics;
 
-extern {
-    /// Change the data segment. See `man sbrk`.
-    pub fn sbrk(ptr: libc::intptr_t) -> *const libc::c_void;
-    /// Write a buffer to a file descriptor.
-    fn write(fd: libc::c_int, buff: *const libc::c_void, size: libc::size_t) -> libc::ssize_t;
+/// Voluntarily give a time slice to the scheduler.
+pub fn sched_yield() -> usize {
+    unsafe { syscall!(SCHED_YIELD) }
+}
+
+/// The default OOM handler.
+#[cold]
+pub fn default_oom_handler() -> ! {
+    // Log some message.
+    log("\x1b[31;1mThe application ran out of memory. Aborting.\n");
+
+    unsafe {
+        intrinsics::abort();
+    }
+}
+
+/// Change the data segment. See `man brk`.
+///
+/// # Note
+///
+/// This is the `brk` **syscall**, not the library function.
+pub unsafe fn brk(ptr: *const u8) -> *const u8 {
+    syscall!(BRK, ptr) as *const u8
 }
 
 /// Write to the log.
 ///
 /// This points to stderr, but could be changed arbitrarily.
-pub fn log(s: &str) -> libc::ssize_t {
-    unsafe { write(2, s.as_ptr() as *const libc::c_void, s.len()) }
+pub fn log(s: &str) -> usize {
+    unsafe { syscall!(WRITE, 2, s.as_ptr(), s.len()) }
 }
 
 /// Thread destructors for Linux.
 #[cfg(target_os = "linux")]
 pub mod thread_destructor {
-    use libc;
-
     extern {
         #[linkage = "extern_weak"]
         static __dso_handle: *mut u8;
         #[linkage = "extern_weak"]
-        static __cxa_thread_atexit_impl: *const libc::c_void;
+        static __cxa_thread_atexit_impl: *const u8;
     }
 
     /// Does this platform support thread destructors?
@@ -58,17 +80,15 @@ pub mod thread_destructor {
         use core::mem;
 
         /// A thread destructor.
-        type Dtor = unsafe extern fn(dtor: unsafe extern fn(*mut u8), arg: *mut u8, dso_handle: *mut u8) -> libc::c_int;
+        type Dtor = unsafe extern fn(dtor: unsafe extern fn(*mut u8), arg: *mut u8, dso_handle: *mut u8) -> i32;
 
-        mem::transmute::<*const libc::c_void, Dtor>(__cxa_thread_atexit_impl)(dtor, t, &__dso_handle as *const _ as *mut _);
+        mem::transmute::<*const u8, Dtor>(__cxa_thread_atexit_impl)(dtor, t, &__dso_handle as *const _ as *mut _);
     }
 }
 
 /// Thread destructors for Mac OS.
 #[cfg(target_os = "macos")]
 pub mod thread_destructor {
-    use libc;
-
     /// Does this platform support thread destructors?
     ///
     /// This will always return true.
@@ -93,21 +113,19 @@ pub mod thread_destructor {
 
 /// Debugging.
 pub mod debug {
-    use libc;
-
     extern {
         /// Valgrind symbol to declare memory undefined.
-        fn valgrind_make_mem_undefined(ptr: *const libc::c_void, size: libc::size_t);
+        fn valgrind_make_mem_undefined(ptr: *const u8, size: usize);
         /// Valgrind symbol to declare memory freed.
-        fn valgrind_freelike_block(ptr: *const libc::c_void, size: libc::size_t);
+        fn valgrind_freelike_block(ptr: *const u8, size: usize);
     }
 
     /// Mark this segment undefined to the debugger.
-    pub fn mark_undefined(ptr: *const libc::c_void, size: libc::size_t) {
+    pub fn mark_undefined(ptr: *const u8, size: usize) {
         unsafe { valgrind_make_mem_undefined(ptr, size) }
     }
     /// Mark this segment free to the debugger.
-    pub fn mark_free(ptr: *const libc::c_void, size: libc::size_t) {
+    pub fn mark_free(ptr: *const u8, size: usize) {
         unsafe { valgrind_freelike_block(ptr, size) }
     }
 }

+ 2 - 2
src/allocator.rs

@@ -118,7 +118,7 @@ impl GlobalAllocator {
     fn init() -> GlobalAllocator {
         // The initial acquired segment.
         let (aligner, initial_segment, excessive) =
-            brk::get(4 * bookkeeper::EXTRA_ELEMENTS * mem::size_of::<Block>(), mem::align_of::<Block>());
+            brk::lock().canonical_brk(4 * bookkeeper::EXTRA_ELEMENTS * mem::size_of::<Block>(), mem::align_of::<Block>());
 
         // Initialize the new allocator.
         let mut res = GlobalAllocator {
@@ -141,7 +141,7 @@ impl Allocator for GlobalAllocator {
     #[inline]
     fn alloc_fresh(&mut self, size: usize, align: usize) -> Block {
         // Obtain what you need.
-        let (alignment_block, res, excessive) = brk::get(size, align);
+        let (alignment_block, res, excessive) = brk::lock().canonical_brk(size, align);
 
         // Add it to the list. This will not change the order, since the pointer is higher than all
         // the previous blocks (BRK extends the data segment). Although, it is worth noting that

+ 3 - 23
src/block.rs

@@ -7,10 +7,10 @@
 
 use prelude::*;
 
-use {sys, fail};
-
 use core::{ptr, cmp, mem, fmt};
 
+use sys;
+
 /// A contiguous memory block.
 ///
 /// This provides a number of guarantees,
@@ -38,25 +38,13 @@ pub struct Block {
 impl Block {
     /// Construct a block from its raw parts (pointer and size).
     #[inline]
-    pub unsafe fn from_raw_parts(ptr: Pointer<u8>, size: usize) ->  Block {
+    pub unsafe fn from_raw_parts(ptr: Pointer<u8>, size: usize) -> Block {
         Block {
             size: size,
             ptr: ptr,
         }
     }
 
-    /// BRK allocate a block.
-    #[inline]
-    #[allow(cast_possible_wrap)]
-    pub fn brk(size: usize) -> Block {
-        Block {
-            size: size,
-            ptr: unsafe {
-                Pointer::new(sys::sbrk(size as isize).unwrap_or_else(|()| fail::oom()))
-            },
-        }.mark_uninitialized()
-    }
-
     /// Create an empty block starting at `ptr`.
     #[inline]
     pub fn empty(ptr: Pointer<u8>) -> Block {
@@ -366,12 +354,4 @@ mod test {
         assert_eq!(*Pointer::from(block.empty_left()) as *const u8, arr.as_ptr());
         assert_eq!(block.empty_right(), block.split(arr.len()).1);
     }
-
-    #[test]
-    fn test_brk_grow_up() {
-        let brk1 = Block::brk(5);
-        let brk2 = Block::brk(100);
-
-        assert!(brk1 < brk2);
-    }
 }

+ 1 - 0
src/bookkeeper.rs

@@ -67,6 +67,7 @@ pub struct Bookkeeper {
     id: usize,
 }
 
+#[allow(len_without_is_empty)]
 impl Bookkeeper {
     /// Create a new bookkeeper with some initial vector.
     pub fn new(vec: Vec<Block>) -> Bookkeeper {

+ 129 - 26
src/brk.rs

@@ -1,10 +1,126 @@
 //! BRK abstractions.
 //!
-//! This module provides safe abstractions over SBRK.
+//! This module provides safe abstractions over BRK.
 
 use prelude::*;
 
-use core::cmp;
+use core::{cmp, ptr};
+use core::convert::TryInto;
+
+use {sync, sys, fail};
+
+/// The BRK mutex.
+///
+/// This is used for avoiding data races in multiple allocator.
+static BRK_MUTEX: Mutex<BrkState> = Mutex::new(BrkState {
+    brk_end: None,
+});
+
+/// A cache of the BRK state.
+///
+/// To avoid keeping asking the OS for information whenever needed, we cache it.
+struct BrkState {
+    /// The program break's end
+    brk_end: Option<Pointer<u8>>,
+}
+
+/// A BRK lock.
+pub struct BrkLock {
+    /// The inner lock.
+    guard: sync::MutexGuard<'static, BrkState>,
+}
+
+impl BrkLock {
+    /// BRK new space.
+    ///
+    /// The first block represents the aligner segment (that is the precursor aligning the middle
+    /// block to `align`), the second one is the result and is of exactly size `size`. The last
+    /// block is the excessive space.
+    ///
+    /// # Failure
+    ///
+    /// This method calls the OOM handler if it is unable to acquire the needed space.
+    // TODO: This method is possibly unsafe.
+    pub fn canonical_brk(&mut self, size: usize, align: usize) -> (Block, Block, Block) {
+        // Calculate the canonical size (extra space is allocated to limit the number of system calls).
+        let brk_size = canonicalize_space(size) + align;
+
+        // Use SBRK to allocate extra data segment. The alignment is used as precursor for our
+        // allocated block. This ensures that it is properly memory aligned to the requested value.
+        // TODO: Audit the casts.
+        let (alignment_block, rest) = unsafe {
+            Block::from_raw_parts(
+                self.sbrk(brk_size.try_into().unwrap()).unwrap_or_else(|()| fail::oom()),
+                brk_size,
+            )
+        }.align(align).unwrap();
+
+        // Split the block to leave the excessive space.
+        let (res, excessive) = rest.split(size);
+
+        // Make some assertions.
+        debug_assert!(res.aligned_to(align), "Alignment failed.");
+        debug_assert!(res.size() + alignment_block.size() + excessive.size() == brk_size, "BRK memory leak.");
+
+        (alignment_block, res, excessive)
+    }
+
+    /// Extend the program break.
+    ///
+    /// # Safety
+    ///
+    /// Due to being able shrink the program break, this method is unsafe.
+    unsafe fn sbrk(&mut self, size: isize) -> Result<Pointer<u8>, ()> {
+        // Calculate the new program break. To avoid making multiple syscalls, we make use of the
+        // state cache.
+        let new_brk = self.guard.brk_end
+            .clone()
+            .unwrap_or_else(current_brk)
+            .offset(size);
+
+        // Break it to me, babe!
+        let old_brk = Pointer::new(sys::brk(*new_brk as *const u8) as *mut u8);
+
+        if new_brk == old_brk && size != 0 {
+            // BRK failed. This syscall is rather weird, but whenever it fails (e.g. OOM) it
+            // returns the old (unchanged) break.
+            Err(())
+        } else {
+            // Update the program break cache.
+            self.guard.brk_end = Some(old_brk.clone());
+
+            // Return the old break.
+            Ok(old_brk)
+        }
+    }
+}
+
+/// Lock the BRK lock to allow manipulating the program break.
+pub fn lock() -> BrkLock {
+    BrkLock {
+        guard: BRK_MUTEX.lock(),
+    }
+}
+
+/// `SBRK` symbol which can coexist with the allocator.
+///
+/// `SBRK`-ing directly (from the `BRK` syscall or libc) might make the state inconsistent. This
+/// function makes sure that's not happening.
+///
+/// With the exception of being able to coexist, it follows the same rules. Refer to the relevant
+/// documentation.
+///
+/// # Failure
+///
+/// On failure the maximum pointer (`!0 as *mut u8`) is returned.
+pub unsafe extern fn sbrk(size: isize) -> *mut u8 {
+    *lock().sbrk(size).unwrap_or_else(|()| Pointer::new(!0 as *mut u8))
+}
+
+/// Get the current program break.
+fn current_brk() -> Pointer<u8> {
+    unsafe { Pointer::new(sys::brk(ptr::null()) as *mut u8) }
+}
 
 /// Canonicalize a BRK request.
 ///
@@ -37,38 +153,25 @@ fn canonicalize_space(min: usize) -> usize {
     res
 }
 
-/// BRK new space.
-///
-/// The first block represents the aligner segment (that is the precursor aligning the middle
-/// block to `align`), the second one is the result and is of exactly size `size`. The last
-/// block is the excessive space.
-pub fn get(size: usize, align: usize) -> (Block, Block, Block) {
-    // Calculate the canonical size (extra space is allocated to limit the number of system calls).
-    let brk_size = canonicalize_space(size) + align;
-
-    // Use SBRK to allocate extra data segment. The alignment is used as precursor for our
-    // allocated block. This ensures that it is properly memory aligned to the requested value.
-    let (alignment_block, rest) = Block::brk(brk_size).align(align).unwrap();
-
-    // Split the block to leave the excessive space.
-    let (res, excessive) = rest.split(size);
-
-    // Make some assertions.
-    debug_assert!(res.aligned_to(align), "Alignment failed.");
-    debug_assert!(res.size() + alignment_block.size() + excessive.size() == brk_size, "BRK memory leak.");
-
-    (alignment_block, res, excessive)
-}
-
 #[cfg(test)]
 mod test {
     use super::*;
 
     #[test]
     fn test_ordered() {
-        let brk = get(20, 1);
+        let brk = lock().canonical_brk(20, 1);
 
         assert!(brk.0 <= brk.1);
         assert!(brk.1 <= brk.2);
     }
+
+    #[test]
+    fn test_brk_grow_up() {
+        unsafe {
+            let brk1 = lock().sbrk(5).unwrap();
+            let brk2 = lock().sbrk(100).unwrap();
+
+            assert!(*brk1 < *brk2);
+        }
+    }
 }

+ 4 - 12
src/fail.rs

@@ -3,29 +3,21 @@
 use prelude::*;
 
 use core::sync::atomic::{self, AtomicPtr};
-use core::{mem, intrinsics};
+use core::mem;
+
+use sys;
 
 #[cfg(feature = "tls")]
 use tls;
 
 /// The global OOM handler.
-static OOM_HANDLER: AtomicPtr<()> = AtomicPtr::new(default_oom_handler as *mut ());
+static OOM_HANDLER: AtomicPtr<()> = AtomicPtr::new(sys::default_oom_handler as *mut ());
 #[cfg(feature = "tls")]
 tls! {
     /// The thread-local OOM handler.
     static THREAD_OOM_HANDLER: MoveCell<Option<fn() -> !>> = MoveCell::new(None);
 }
 
-/// The default OOM handler.
-///
-/// This will simply abort the process.
-#[cold]
-fn default_oom_handler() -> ! {
-    unsafe {
-        intrinsics::abort();
-    }
-}
-
 /// Call the OOM handler.
 ///
 /// This is used one out-of-memory errors, and will never return. Usually, it simply consists

+ 3 - 2
src/lib.rs

@@ -16,7 +16,8 @@
 #![no_std]
 
 #![feature(allocator, const_fn, core_intrinsics, stmt_expr_attributes, drop_types_in_const,
-           nonzero, optin_builtin_traits, type_ascription, question_mark, thread_local, linkage)]
+           nonzero, optin_builtin_traits, type_ascription, question_mark, thread_local, linkage,
+           try_from)]
 #![warn(missing_docs, cast_precision_loss, cast_sign_loss, cast_possible_wrap,
         cast_possible_truncation, filter_map, if_not_else, items_after_statements,
         invalid_upcast_comparisons, mutex_integer, nonminimal_bool, shadow_same, shadow_unrelated,
@@ -52,7 +53,7 @@ mod sys;
 mod vec;
 
 pub use allocator::{alloc, free, realloc, realloc_inplace};
+pub use brk::sbrk;
 pub use fail::set_oom_handler;
-pub use sys::sbrk;
 #[cfg(feature = "tls")]
 pub use fail::set_thread_oom_handler;

+ 15 - 37
src/sys.rs

@@ -1,41 +1,29 @@
 //! System primitives.
+//!
+//! This mostly wraps the `ralloc_shim` crate but provides some additional error handling.
 
 extern crate ralloc_shim as shim;
 
-use prelude::*;
-
 use core::mem;
 
-/// The BRK mutex.
-///
-/// This is used for avoiding data races in multiple allocator.
-static BRK_MUTEX: Mutex<()> = Mutex::new(());
+pub use self::shim::default_oom_handler;
 
-/// Increment data segment of this process by some, _n_, return a pointer to the new data segment
-/// start.
+/// Set the program break.
 ///
-/// This uses the system call BRK as backend.
+/// On success, the new program break is returned. On failure, the old program break is returned.
 ///
 /// # Safety
 ///
-/// This is safe unless you have negative or overflowing `n`.
+/// This is due to being able to invalidate safe addresses as well as breaking invariants for the
+/// [`brk`](../brk).
 #[inline]
-pub unsafe fn sbrk(n: isize) -> Result<*mut u8, ()> {
-    // Lock the BRK mutex.
-    #[cfg(not(feature = "unsafe_no_brk_lock"))]
-    let _guard = BRK_MUTEX.lock();
-
-    let brk = shim::sbrk(n);
-    if brk as usize == !0 {
-        Err(())
-    } else {
-        Ok(brk as *mut u8)
-    }
+pub unsafe fn brk(ptr: *const u8) -> *const u8 {
+    shim::brk(ptr)
 }
 
 /// Cooperatively gives up a timeslice to the OS scheduler.
 pub fn yield_now() {
-    assert_eq!(unsafe { shim::sched_yield() }, 0);
+    assert_eq!(shim::sched_yield(), 0);
 }
 
 /// Register a thread destructor.
@@ -67,33 +55,23 @@ pub fn register_thread_destructor<T>(load: *mut T, dtor: extern fn(*mut T)) -> R
 // TODO: Find a better way to silence the warning than this attribute.
 #[allow(dead_code)]
 pub fn log(s: &str) -> Result<(), ()> {
-    if shim::log(s) == -1 { Err(()) } else { Ok(()) }
+    if shim::log(s) == !0 { Err(()) } else { Ok(()) }
 }
 
 /// Tell the debugger that this segment is free.
 ///
 /// If the `debugger` feature is disabled, this is a NOOP.
+#[inline(always)]
 pub fn mark_free(_ptr: *const u8, _size: usize) {
     #[cfg(feature = "debugger")]
-    shim::debug::mark_free(_ptr as *const _, _size);
+    shim::debug::mark_free(_ptr, _size);
 }
 
 /// Tell the debugger that this segment is unaccessible.
 ///
 /// If the `debugger` feature is disabled, this is a NOOP.
+#[inline(always)]
 pub fn mark_uninitialized(_ptr: *const u8, _size: usize) {
     #[cfg(feature = "debugger")]
-    shim::debug::mark_free(_ptr as *const _, _size);
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn test_oom() {
-        unsafe {
-            assert!(sbrk(9999999999999).is_err());
-        }
-    }
+    shim::debug::mark_free(_ptr, _size);
 }