Browse Source

Add control flow information to __rust_probestack (#328)

Tyler Mandry 5 years ago
parent
commit
2566aa663b
2 changed files with 144 additions and 89 deletions
  1. 1 0
      src/lib.rs
  2. 143 89
      src/probestack.rs

+ 1 - 0
src/lib.rs

@@ -1,6 +1,7 @@
 #![cfg_attr(feature = "compiler-builtins", compiler_builtins)]
 #![feature(abi_unadjusted)]
 #![feature(asm)]
+#![feature(global_asm)]
 #![feature(cfg_target_has_atomic)]
 #![feature(compiler_builtins)]
 #![feature(core_intrinsics)]

+ 143 - 89
src/probestack.rs

@@ -41,95 +41,149 @@
 //! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
 //! be more than welcome to accept such a change!
 
-#![cfg(not(windows))] // Windows already has builtins to do this
-
-#[naked]
-#[no_mangle]
-#[cfg(all(target_arch = "x86_64", not(feature = "mangled-names")))]
-pub unsafe extern "C" fn __rust_probestack() {
-    // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
-    // ensuring that if any pages are unmapped we'll make a page fault.
-    //
-    // The ABI here is that the stack frame size is located in `%eax`. Upon
-    // return we're not supposed to modify `%esp` or `%eax`.
-    asm!("
-        pushq  %rbp
-        movq   %rsp, %rbp
-
-        mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
-
-        // Main loop, taken in one page increments. We're decrementing rsp by
-        // a page each time until there's less than a page remaining. We're
-        // guaranteed that this function isn't called unless there's more than a
-        // page needed.
-        //
-        // Note that we're also testing against `8(%rsp)` to account for the 8
-        // bytes pushed on the stack orginally with our return address. Using
-        // `8(%rsp)` simulates us testing the stack pointer in the caller's
-        // context.
-
-        // It's usually called when %rax >= 0x1000, but that's not always true.
-        // Dynamic stack allocation, which is needed to implement unsized
-        // rvalues, triggers stackprobe even if %rax < 0x1000.
-        // Thus we have to check %r11 first to avoid segfault.
-        cmp    $$0x1000,%r11
-        jna    3f
-    2:
-        sub    $$0x1000,%rsp
-        test   %rsp,8(%rsp)
-        sub    $$0x1000,%r11
-        cmp    $$0x1000,%r11
-        ja     2b
-
-    3:
-        // Finish up the last remaining stack space requested, getting the last
-        // bits out of r11
-        sub    %r11,%rsp
-        test   %rsp,8(%rsp)
-
-        // Restore the stack pointer to what it previously was when entering
-        // this function. The caller will readjust the stack pointer after we
-        // return.
-        add    %rax,%rsp
-
-        leave
-        ret
-    " ::: "memory" : "volatile");
-    ::core::intrinsics::unreachable();
+#![cfg(not(feature = "mangled-names"))]
+// Windows already has builtins to do this.
+#![cfg(not(windows))]
+// We only define stack probing for these architectures today.
+#![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+
+extern "C" {
+    pub fn __rust_probestack();
 }
 
-#[naked]
-#[no_mangle]
-#[cfg(all(target_arch = "x86", not(feature = "mangled-names")))]
-pub unsafe extern "C" fn __rust_probestack() {
-    // This is the same as x86_64 above, only translated for 32-bit sizes. Note
-    // that on Unix we're expected to restore everything as it was, this
-    // function basically can't tamper with anything.
-    //
-    // The ABI here is the same as x86_64, except everything is 32-bits large.
-    asm!("
-        push   %ebp
-        mov    %esp, %ebp
-        push   %ecx
-        mov    %eax,%ecx
-
-        cmp    $$0x1000,%ecx
-        jna    3f
-    2:
-        sub    $$0x1000,%esp
-        test   %esp,8(%esp)
-        sub    $$0x1000,%ecx
-        cmp    $$0x1000,%ecx
-        ja     2b
-
-    3:
-        sub    %ecx,%esp
-        test   %esp,8(%esp)
-
-        add    %eax,%esp
-        pop    %ecx
-        leave
-        ret
-    " ::: "memory" : "volatile");
-    ::core::intrinsics::unreachable();
+// A wrapper for our implementation of __rust_probestack, which allows us to
+// keep the assembly inline while controlling all CFI directives in the assembly
+// emitted for the function.
+//
+// This is the ELF version.
+#[cfg(not(target_vendor = "apple"))]
+macro_rules! define_rust_probestack {
+    ($body: expr) => {
+        concat!(
+            "
+            .pushsection .text.__rust_probestack
+            .globl __rust_probestack
+            .type  __rust_probestack, @function
+        __rust_probestack:
+            ",
+            $body,
+            "
+            .size __rust_probestack, . - __rust_probestack
+            .popsection
+            "
+        )
+    };
+}
+
+// Same as above, but for Mach-O.
+#[cfg(target_vendor = "apple")]
+macro_rules! define_rust_probestack {
+    ($body: expr) => {
+        concat!(
+            "
+            .globl ___rust_probestack
+        ___rust_probestack:
+            ",
+            $body
+        )
+    };
 }
+
+// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
+// ensuring that if any pages are unmapped we'll make a page fault.
+//
+// The ABI here is that the stack frame size is located in `%rax`. Upon
+// return we're not supposed to modify `%rsp` or `%rax`.
+#[cfg(target_arch = "x86_64")]
+global_asm!(define_rust_probestack!(
+    "
+    .cfi_startproc
+    pushq  %rbp
+    .cfi_adjust_cfa_offset 8
+    .cfi_offset %rbp, -16
+    movq   %rsp, %rbp
+    .cfi_def_cfa_register %rbp
+
+    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
+
+    // Main loop, taken in one page increments. We're decrementing rsp by
+    // a page each time until there's less than a page remaining. We're
+    // guaranteed that this function isn't called unless there's more than a
+    // page needed.
+    //
+    // Note that we're also testing against `8(%rsp)` to account for the 8
+    // bytes pushed on the stack orginally with our return address. Using
+    // `8(%rsp)` simulates us testing the stack pointer in the caller's
+    // context.
+
+    // It's usually called when %rax >= 0x1000, but that's not always true.
+    // Dynamic stack allocation, which is needed to implement unsized
+    // rvalues, triggers stackprobe even if %rax < 0x1000.
+    // Thus we have to check %r11 first to avoid segfault.
+    cmp    $0x1000,%r11
+    jna    3f
+2:
+    sub    $0x1000,%rsp
+    test   %rsp,8(%rsp)
+    sub    $0x1000,%r11
+    cmp    $0x1000,%r11
+    ja     2b
+
+3:
+    // Finish up the last remaining stack space requested, getting the last
+    // bits out of r11
+    sub    %r11,%rsp
+    test   %rsp,8(%rsp)
+
+    // Restore the stack pointer to what it previously was when entering
+    // this function. The caller will readjust the stack pointer after we
+    // return.
+    add    %rax,%rsp
+
+    leave
+    .cfi_def_cfa_register %rsp
+    .cfi_adjust_cfa_offset -8
+    ret
+    .cfi_endproc
+    "
+));
+
+#[cfg(target_arch = "x86")]
+// This is the same as x86_64 above, only translated for 32-bit sizes. Note
+// that on Unix we're expected to restore everything as it was, this
+// function basically can't tamper with anything.
+//
+// The ABI here is the same as x86_64, except everything is 32-bits large.
+global_asm!(define_rust_probestack!(
+    "
+    .cfi_startproc
+    push   %ebp
+    .cfi_adjust_cfa_offset 4
+    .cfi_offset %ebp, -8
+    mov    %esp, %ebp
+    .cfi_def_cfa_register %ebp
+    push   %ecx
+    mov    %eax,%ecx
+
+    cmp    $0x1000,%ecx
+    jna    3f
+2:
+    sub    $0x1000,%esp
+    test   %esp,8(%esp)
+    sub    $0x1000,%ecx
+    cmp    $0x1000,%ecx
+    ja     2b
+
+3:
+    sub    %ecx,%esp
+    test   %esp,8(%esp)
+
+    add    %eax,%esp
+    pop    %ecx
+    leave
+    .cfi_def_cfa_register %esp
+    .cfi_adjust_cfa_offset -4
+    ret
+    .cfi_endproc
+    "
+));