5 years ago · 2566aa663b
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,7 @@
 
				 #![cfg_attr(feature = "compiler-builtins", compiler_builtins)]
			
 
				 #![feature(abi_unadjusted)]
			
 
				 #![feature(asm)]
			
 
				+#![feature(global_asm)]
			
 
				 #![feature(cfg_target_has_atomic)]
			
 
				 #![feature(compiler_builtins)]
			
 
				 #![feature(core_intrinsics)]
			
--- a/src/probestack.rs
+++ b/src/probestack.rs
@@ -41,95 +41,149 @@
 
				 //! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
			
 
				 //! be more than welcome to accept such a change!
			
 
				 
			
 
				-#![cfg(not(windows))] // Windows already has builtins to do this
			
 
				-
			
 
				-#[naked]
			
 
				-#[no_mangle]
			
 
				-#[cfg(all(target_arch = "x86_64", not(feature = "mangled-names")))]
			
 
				-pub unsafe extern "C" fn __rust_probestack() {
			
 
				-    // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
			
 
				-    // ensuring that if any pages are unmapped we'll make a page fault.
			
 
				-    //
			
 
				-    // The ABI here is that the stack frame size is located in `%eax`. Upon
			
 
				-    // return we're not supposed to modify `%esp` or `%eax`.
			
 
				-    asm!("
			
 
				-        pushq  %rbp
			
 
				-        movq   %rsp, %rbp
			
 
				-
			
 
				-        mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
			
 
				-
			
 
				-        // Main loop, taken in one page increments. We're decrementing rsp by
			
 
				-        // a page each time until there's less than a page remaining. We're
			
 
				-        // guaranteed that this function isn't called unless there's more than a
			
 
				-        // page needed.
			
 
				-        //
			
 
				-        // Note that we're also testing against `8(%rsp)` to account for the 8
			
 
				-        // bytes pushed on the stack orginally with our return address. Using
			
 
				-        // `8(%rsp)` simulates us testing the stack pointer in the caller's
			
 
				-        // context.
			
 
				-
			
 
				-        // It's usually called when %rax >= 0x1000, but that's not always true.
			
 
				-        // Dynamic stack allocation, which is needed to implement unsized
			
 
				-        // rvalues, triggers stackprobe even if %rax < 0x1000.
			
 
				-        // Thus we have to check %r11 first to avoid segfault.
			
 
				-        cmp    $$0x1000,%r11
			
 
				-        jna    3f
			
 
				-    2:
			
 
				-        sub    $$0x1000,%rsp
			
 
				-        test   %rsp,8(%rsp)
			
 
				-        sub    $$0x1000,%r11
			
 
				-        cmp    $$0x1000,%r11
			
 
				-        ja     2b
			
 
				-
			
 
				-    3:
			
 
				-        // Finish up the last remaining stack space requested, getting the last
			
 
				-        // bits out of r11
			
 
				-        sub    %r11,%rsp
			
 
				-        test   %rsp,8(%rsp)
			
 
				-
			
 
				-        // Restore the stack pointer to what it previously was when entering
			
 
				-        // this function. The caller will readjust the stack pointer after we
			
 
				-        // return.
			
 
				-        add    %rax,%rsp
			
 
				-
			
 
				-        leave
			
 
				-        ret
			
 
				-    " ::: "memory" : "volatile");
			
 
				-    ::core::intrinsics::unreachable();
			
 
				+#![cfg(not(feature = "mangled-names"))]
			
 
				+// Windows already has builtins to do this.
			
 
				+#![cfg(not(windows))]
			
 
				+// We only define stack probing for these architectures today.
			
 
				+#![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
			
 
				+
			
 
				+extern "C" {
			
 
				+    pub fn __rust_probestack();
			
 
				 }
			
 
				 
			
 
				-#[naked]
			
 
				-#[no_mangle]
			
 
				-#[cfg(all(target_arch = "x86", not(feature = "mangled-names")))]
			
 
				-pub unsafe extern "C" fn __rust_probestack() {
			
 
				-    // This is the same as x86_64 above, only translated for 32-bit sizes. Note
			
 
				-    // that on Unix we're expected to restore everything as it was, this
			
 
				-    // function basically can't tamper with anything.
			
 
				-    //
			
 
				-    // The ABI here is the same as x86_64, except everything is 32-bits large.
			
 
				-    asm!("
			
 
				-        push   %ebp
			
 
				-        mov    %esp, %ebp
			
 
				-        push   %ecx
			
 
				-        mov    %eax,%ecx
			
 
				-
			
 
				-        cmp    $$0x1000,%ecx
			
 
				-        jna    3f
			
 
				-    2:
			
 
				-        sub    $$0x1000,%esp
			
 
				-        test   %esp,8(%esp)
			
 
				-        sub    $$0x1000,%ecx
			
 
				-        cmp    $$0x1000,%ecx
			
 
				-        ja     2b
			
 
				-
			
 
				-    3:
			
 
				-        sub    %ecx,%esp
			
 
				-        test   %esp,8(%esp)
			
 
				-
			
 
				-        add    %eax,%esp
			
 
				-        pop    %ecx
			
 
				-        leave
			
 
				-        ret
			
 
				-    " ::: "memory" : "volatile");
			
 
				-    ::core::intrinsics::unreachable();
			
 
				+// A wrapper for our implementation of __rust_probestack, which allows us to
			
 
				+// keep the assembly inline while controlling all CFI directives in the assembly
			
 
				+// emitted for the function.
			
 
				+//
			
 
				+// This is the ELF version.
			
 
				+#[cfg(not(target_vendor = "apple"))]
			
 
				+macro_rules! define_rust_probestack {
			
 
				+    ($body: expr) => {
			
 
				+        concat!(
			
 
				+            "
			
 
				+            .pushsection .text.__rust_probestack
			
 
				+            .globl __rust_probestack
			
 
				+            .type  __rust_probestack, @function
			
 
				+        __rust_probestack:
			
 
				+            ",
			
 
				+            $body,
			
 
				+            "
			
 
				+            .size __rust_probestack, . - __rust_probestack
			
 
				+            .popsection
			
 
				+            "
			
 
				+        )
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				+// Same as above, but for Mach-O.
			
 
				+#[cfg(target_vendor = "apple")]
			
 
				+macro_rules! define_rust_probestack {
			
 
				+    ($body: expr) => {
			
 
				+        concat!(
			
 
				+            "
			
 
				+            .globl ___rust_probestack
			
 
				+        ___rust_probestack:
			
 
				+            ",
			
 
				+            $body
			
 
				+        )
			
 
				+    };
			
 
				 }
			
 
				+
			
 
				+// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
			
 
				+// ensuring that if any pages are unmapped we'll make a page fault.
			
 
				+//
			
 
				+// The ABI here is that the stack frame size is located in `%rax`. Upon
			
 
				+// return we're not supposed to modify `%rsp` or `%rax`.
			
 
				+#[cfg(target_arch = "x86_64")]
			
 
				+global_asm!(define_rust_probestack!(
			
 
				+    "
			
 
				+    .cfi_startproc
			
 
				+    pushq  %rbp
			
 
				+    .cfi_adjust_cfa_offset 8
			
 
				+    .cfi_offset %rbp, -16
			
 
				+    movq   %rsp, %rbp
			
 
				+    .cfi_def_cfa_register %rbp
			
 
				+
			
 
				+    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
			
 
				+
			
 
				+    // Main loop, taken in one page increments. We're decrementing rsp by
			
 
				+    // a page each time until there's less than a page remaining. We're
			
 
				+    // guaranteed that this function isn't called unless there's more than a
			
 
				+    // page needed.
			
 
				+    //
			
 
				+    // Note that we're also testing against `8(%rsp)` to account for the 8
			
 
				+    // bytes pushed on the stack orginally with our return address. Using
			
 
				+    // `8(%rsp)` simulates us testing the stack pointer in the caller's
			
 
				+    // context.
			
 
				+
			
 
				+    // It's usually called when %rax >= 0x1000, but that's not always true.
			
 
				+    // Dynamic stack allocation, which is needed to implement unsized
			
 
				+    // rvalues, triggers stackprobe even if %rax < 0x1000.
			
 
				+    // Thus we have to check %r11 first to avoid segfault.
			
 
				+    cmp    $0x1000,%r11
			
 
				+    jna    3f
			
 
				+2:
			
 
				+    sub    $0x1000,%rsp
			
 
				+    test   %rsp,8(%rsp)
			
 
				+    sub    $0x1000,%r11
			
 
				+    cmp    $0x1000,%r11
			
 
				+    ja     2b
			
 
				+
			
 
				+3:
			
 
				+    // Finish up the last remaining stack space requested, getting the last
			
 
				+    // bits out of r11
			
 
				+    sub    %r11,%rsp
			
 
				+    test   %rsp,8(%rsp)
			
 
				+
			
 
				+    // Restore the stack pointer to what it previously was when entering
			
 
				+    // this function. The caller will readjust the stack pointer after we
			
 
				+    // return.
			
 
				+    add    %rax,%rsp
			
 
				+
			
 
				+    leave
			
 
				+    .cfi_def_cfa_register %rsp
			
 
				+    .cfi_adjust_cfa_offset -8
			
 
				+    ret
			
 
				+    .cfi_endproc
			
 
				+    "
			
 
				+));
			
 
				+
			
 
				+#[cfg(target_arch = "x86")]
			
 
				+// This is the same as x86_64 above, only translated for 32-bit sizes. Note
			
 
				+// that on Unix we're expected to restore everything as it was, this
			
 
				+// function basically can't tamper with anything.
			
 
				+//
			
 
				+// The ABI here is the same as x86_64, except everything is 32-bits large.
			
 
				+global_asm!(define_rust_probestack!(
			
 
				+    "
			
 
				+    .cfi_startproc
			
 
				+    push   %ebp
			
 
				+    .cfi_adjust_cfa_offset 4
			
 
				+    .cfi_offset %ebp, -8
			
 
				+    mov    %esp, %ebp
			
 
				+    .cfi_def_cfa_register %ebp
			
 
				+    push   %ecx
			
 
				+    mov    %eax,%ecx
			
 
				+
			
 
				+    cmp    $0x1000,%ecx
			
 
				+    jna    3f
			
 
				+2:
			
 
				+    sub    $0x1000,%esp
			
 
				+    test   %esp,8(%esp)
			
 
				+    sub    $0x1000,%ecx
			
 
				+    cmp    $0x1000,%ecx
			
 
				+    ja     2b
			
 
				+
			
 
				+3:
			
 
				+    sub    %ecx,%esp
			
 
				+    test   %esp,8(%esp)
			
 
				+
			
 
				+    add    %eax,%esp
			
 
				+    pop    %ecx
			
 
				+    leave
			
 
				+    .cfi_def_cfa_register %esp
			
 
				+    .cfi_adjust_cfa_offset -4
			
 
				+    ret
			
 
				+    .cfi_endproc
			
 
				+    "
			
 
				+));