5 years ago · f853d6d9b7
--- a/src/probestack.rs
+++ b/src/probestack.rs
@@ -109,7 +109,12 @@ macro_rules! define_rust_probestack {
 
				 //
			
 
				 // The ABI here is that the stack frame size is located in `%rax`. Upon
			
 
				 // return we're not supposed to modify `%rsp` or `%rax`.
			
 
				-#[cfg(target_arch = "x86_64")]
			
 
				+//
			
 
				+// Any changes to this function should be replicated to the SGX version below.
			
 
				+#[cfg(all(
			
 
				+    target_arch = "x86_64",
			
 
				+    not(all(target_env = "sgx", target_vendor = "fortanix"))
			
 
				+))]
			
 
				 global_asm!(define_rust_probestack!(
			
 
				     "
			
 
				     .cfi_startproc
			
@@ -163,6 +168,69 @@ global_asm!(define_rust_probestack!(
 
				     "
			
 
				 ));
			
 
				 
			
 
				+// This function is the same as above, except that some instructions are
			
 
				+// [manually patched for LVI].
			
 
				+//
			
 
				+// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
			
 
				+#[cfg(all(
			
 
				+    target_arch = "x86_64",
			
 
				+    all(target_env = "sgx", target_vendor = "fortanix")
			
 
				+))]
			
 
				+global_asm!(define_rust_probestack!(
			
 
				+    "
			
 
				+    .cfi_startproc
			
 
				+    pushq  %rbp
			
 
				+    .cfi_adjust_cfa_offset 8
			
 
				+    .cfi_offset %rbp, -16
			
 
				+    movq   %rsp, %rbp
			
 
				+    .cfi_def_cfa_register %rbp
			
 
				+
			
 
				+    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
			
 
				+
			
 
				+    // Main loop, taken in one page increments. We're decrementing rsp by
			
 
				+    // a page each time until there's less than a page remaining. We're
			
 
				+    // guaranteed that this function isn't called unless there's more than a
			
 
				+    // page needed.
			
 
				+    //
			
 
				+    // Note that we're also testing against `8(%rsp)` to account for the 8
			
 
				+    // bytes pushed on the stack orginally with our return address. Using
			
 
				+    // `8(%rsp)` simulates us testing the stack pointer in the caller's
			
 
				+    // context.
			
 
				+
			
 
				+    // It's usually called when %rax >= 0x1000, but that's not always true.
			
 
				+    // Dynamic stack allocation, which is needed to implement unsized
			
 
				+    // rvalues, triggers stackprobe even if %rax < 0x1000.
			
 
				+    // Thus we have to check %r11 first to avoid segfault.
			
 
				+    cmp    $0x1000,%r11
			
 
				+    jna    3f
			
 
				+2:
			
 
				+    sub    $0x1000,%rsp
			
 
				+    test   %rsp,8(%rsp)
			
 
				+    sub    $0x1000,%r11
			
 
				+    cmp    $0x1000,%r11
			
 
				+    ja     2b
			
 
				+
			
 
				+3:
			
 
				+    // Finish up the last remaining stack space requested, getting the last
			
 
				+    // bits out of r11
			
 
				+    sub    %r11,%rsp
			
 
				+    test   %rsp,8(%rsp)
			
 
				+
			
 
				+    // Restore the stack pointer to what it previously was when entering
			
 
				+    // this function. The caller will readjust the stack pointer after we
			
 
				+    // return.
			
 
				+    add    %rax,%rsp
			
 
				+
			
 
				+    leave
			
 
				+    .cfi_def_cfa_register %rsp
			
 
				+    .cfi_adjust_cfa_offset -8
			
 
				+    pop %r11
			
 
				+    lfence
			
 
				+    jmp *%r11
			
 
				+    .cfi_endproc
			
 
				+    "
			
 
				+));
			
 
				+
			
 
				 #[cfg(target_arch = "x86")]
			
 
				 // This is the same as x86_64 above, only translated for 32-bit sizes. Note
			
 
				 // that on Unix we're expected to restore everything as it was, this