Browse Source

Auto merge of #176 - alexcrichton:probestack2, r=alexcrichton

Tweak definition of probestack functions

It looks like the old `__rust_probestack` routine is incompatible with newer
linux kernels. My best guess for this is that the kernel's auto-growth logic is
failing to trigger, causing what looks like a legitimate segfault to get
delivered. My best guess for why *that's* happening is that the faulting address
is below `%rsp`, whereas previously all faulting stack addresses were above
`%rsp`. The probestack routine does not modify `%rsp` as it's probing the stack,
and presumably newer kernels are interpreting this as a legitimate violation.

This commit tweaks the probestack routine to instead update `%rsp` incrementally
as probing happens. The ABI of the function, however, requires that `%rsp`
isn't changed as part of the function so it's restored at the end to the
previous value.
bors 7 years ago
parent
commit
5e49856003
6 changed files with 51 additions and 36 deletions
  1. 11 8
      build.rs
  2. 1 0
      ci/run.sh
  3. 6 0
      examples/intrinsics.rs
  4. 3 2
      src/float/conv.rs
  5. 27 23
      src/probestack.rs
  6. 3 3
      src/x86_64.rs

+ 11 - 8
build.rs

@@ -21,9 +21,14 @@ fn main() {
     #[cfg(feature = "gen-tests")]
     tests::generate();
 
-    // Build missing intrinsics from compiler-rt C source code
-    #[cfg(feature = "c")]
-    c::compile(&llvm_target);
+    // Build missing intrinsics from compiler-rt C source code. If we're
+    // mangling names though we assume that we're also in test mode so we don't
+    // build anything and we rely on the upstream implementation of compiler-rt
+    // functions
+    if !cfg!(feature = "mangled-names") {
+        #[cfg(feature = "c")]
+        c::compile(&llvm_target);
+    }
 
     // To compile intrinsics.rs for thumb targets, where there is no libc
     if llvm_target[0].starts_with("thumb") {
@@ -4099,11 +4104,9 @@ mod c {
         // also needs to satisfy intrinsics that jemalloc or C in general may
         // need, so include a few more that aren't typically needed by
         // LLVM/Rust.
-        if env::var_os("CARGO_FEATURE_RUSTBUILD").is_some() {
-            sources.extend(&[
-                "ffsdi2.c",
-            ]);
-        }
+        sources.extend(&[
+            "ffsdi2.c",
+        ]);
 
         if target_os != "ios" {
             sources.extend(

+ 1 - 0
ci/run.sh

@@ -93,6 +93,7 @@ for rlib in $(echo $path); do
       uniq -d | \
       grep -v __x86.get_pc_thunk | \
       grep -v __builtin_cl | \
+      grep -v __builtin_ctz | \
       grep 'T __'
 
     if test $? = 0; then

+ 6 - 0
examples/intrinsics.rs

@@ -13,6 +13,8 @@
 #![feature(lang_items)]
 #![feature(start)]
 #![feature(i128_type)]
+#![feature(global_allocator)]
+#![feature(allocator_api)]
 #![cfg_attr(windows, feature(panic_unwind))]
 #![no_std]
 
@@ -22,6 +24,10 @@ extern crate compiler_builtins;
 #[cfg(windows)]
 extern crate panic_unwind;
 
+#[cfg(not(thumb))]
+#[global_allocator]
+static A: alloc_system::System = alloc_system::System;
+
 // NOTE cfg(not(thumbv6m)) means that the operation is not supported on ARMv6-M at all. Not even
 // compiler-rt provides a C/assembly implementation.
 

+ 3 - 2
src/float/conv.rs

@@ -112,8 +112,9 @@ intrinsics! {
         int_to_float!(i, u32, f64)
     }
 
-    #[use_c_shim_if(all(any(target_arch = "x86", target_arch = "x86_64"),
-                        not(windows)))]
+    #[use_c_shim_if(all(not(target_env = "msvc"),
+                        any(target_arch = "x86",
+                            all(not(windows), target_arch = "x86_64"))))]
     #[arm_aeabi_alias = __aeabi_ul2d]
     pub extern "C" fn __floatundidf(i: u64) -> f64 {
         int_to_float!(i, u64, f64)

+ 27 - 23
src/probestack.rs

@@ -44,7 +44,7 @@
 #![cfg(not(windows))] // Windows already has builtins to do this
 
 #[naked]
-#[no_mangle]
+#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 #[cfg(target_arch = "x86_64")]
 pub unsafe extern fn __rust_probestack() {
     // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
@@ -53,28 +53,33 @@ pub unsafe extern fn __rust_probestack() {
     // The ABI here is that the stack frame size is located in `%eax`. Upon
     // return we're not supposed to modify `%esp` or `%eax`.
     asm!("
-        lea    8(%rsp),%r11     // rsp before calling this routine -> r11
+        mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
 
-        // Main loop, taken in one page increments. We're decrementing r11 by
+        // Main loop, taken in one page increments. We're decrementing rsp by
         // a page each time until there's less than a page remaining. We're
         // guaranteed that this function isn't called unless there's more than a
-        // page needed
+        // page needed.
+        //
+        // Note that we're also testing against `8(%rsp)` to account for the 8
+        // bytes pushed on the stack orginally with our return address. Using
+        // `8(%rsp)` simulates us testing the stack pointer in the caller's
+        // context.
     2:
+        sub    $$0x1000,%rsp
+        test   %rsp,8(%rsp)
         sub    $$0x1000,%r11
-        test   %r11,(%r11)
-        sub    $$0x1000,%rax
-        cmp    $$0x1000,%rax
+        cmp    $$0x1000,%r11
         ja     2b
 
         // Finish up the last remaining stack space requested, getting the last
-        // bits out of rax
-        sub    %rax,%r11
-        test   %r11,(%r11)
+        // bits out of r11
+        sub    %r11,%rsp
+        test   %rsp,8(%rsp)
 
-        // We now know that %r11 is (%rsp + 8 - %rax) so to recover rax
-        // we calculate (%rsp + 8) - %r11 which will give us %rax
-        lea    8(%rsp),%rax
-        sub    %r11,%rax
+        // Restore the stack pointer to what it previously was when entering
+        // this function. The caller will readjust the stack pointer after we
+        // return.
+        add    %rax,%rsp
 
         ret
     ");
@@ -82,7 +87,7 @@ pub unsafe extern fn __rust_probestack() {
 }
 
 #[naked]
-#[no_mangle]
+#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 #[cfg(target_arch = "x86")]
 pub unsafe extern fn __rust_probestack() {
     // This is the same as x86_64 above, only translated for 32-bit sizes. Note
@@ -92,19 +97,18 @@ pub unsafe extern fn __rust_probestack() {
     // The ABI here is the same as x86_64, except everything is 32-bits large.
     asm!("
         push   %ecx
-        lea    8(%esp),%ecx
+        mov    %eax,%ecx
     2:
+        sub    $$0x1000,%esp
+        test   %esp,8(%esp)
         sub    $$0x1000,%ecx
-        test   %ecx,(%ecx)
-        sub    $$0x1000,%eax
-        cmp    $$0x1000,%eax
+        cmp    $$0x1000,%ecx
         ja     2b
 
-        sub    %eax,%ecx
-        test   %ecx,(%ecx)
+        sub    %ecx,%esp
+        test   %esp,8(%esp)
 
-        lea    8(%esp),%eax
-        sub    %ecx,%eax
+        add    %eax,%esp
         pop    %ecx
         ret
     ");

+ 3 - 3
src/x86_64.rs

@@ -10,7 +10,7 @@ use core::intrinsics;
 
 #[cfg(windows)]
 #[naked]
-#[no_mangle]
+#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 pub unsafe fn ___chkstk_ms() {
     asm!("push   %rcx
           push   %rax
@@ -34,7 +34,7 @@ pub unsafe fn ___chkstk_ms() {
 
 #[cfg(windows)]
 #[naked]
-#[no_mangle]
+#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 pub unsafe fn __alloca() {
     asm!("mov    %rcx,%rax  // x64 _alloca is a normal function with parameter in rcx
           jmp    ___chkstk  // Jump to ___chkstk since fallthrough may be unreliable");
@@ -43,7 +43,7 @@ pub unsafe fn __alloca() {
 
 #[cfg(windows)]
 #[naked]
-#[no_mangle]
+#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 pub unsafe fn ___chkstk() {
     asm!("push   %rcx
           cmp    $$0x1000,%rax