probestack.rs 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. // Copyright 2017 The Rust Project Developers. See the COPYRIGHT
  2. // file at the top-level directory of this distribution and at
  3. // http://rust-lang.org/COPYRIGHT.
  4. //
  5. // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  6. // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  7. // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  8. // option. This file may not be copied, modified, or distributed
  9. // except according to those terms.
  10. //! This module defines the `__rust_probestack` intrinsic which is used in the
  11. //! implementation of "stack probes" on certain platforms.
  12. //!
  13. //! The purpose of a stack probe is to provide a static guarantee that if a
  14. //! thread has a guard page then a stack overflow is guaranteed to hit that
  15. //! guard page. If a function did not have a stack probe then there's a risk of
  16. //! having a stack frame *larger* than the guard page, so a function call could
  17. //! skip over the guard page entirely and then later hit maybe the heap or
  18. //! another thread, possibly leading to security vulnerabilities such as [The
  19. //! Stack Clash], for example.
  20. //!
  21. //! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash
  22. //!
  23. //! The `__rust_probestack` is called in the prologue of functions whose stack
  24. //! size is larger than the guard page, for example larger than 4096 bytes on
  25. //! x86. This function is then responsible for "touching" all pages relevant to
  26. //! the stack to ensure that that if any of them are the guard page we'll hit
  27. //! them guaranteed.
  28. //!
  29. //! The precise ABI for how this function operates is defined by LLVM. There's
  30. //! no real documentation as to what this is, so you'd basically need to read
  31. //! the LLVM source code for reference. Often though the test cases can be
  32. //! illuminating as to the ABI that's generated, or just looking at the output
  33. //! of `llc`.
  34. //!
  35. //! Note that `#[naked]` is typically used here for the stack probe because the
  36. //! ABI corresponds to no actual ABI.
  37. //!
  38. //! Finally it's worth noting that at the time of this writing LLVM only has
  39. //! support for stack probes on x86 and x86_64. There's no support for stack
  40. //! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
  41. //! be more than welcome to accept such a change!
  42. #![cfg(not(feature = "mangled-names"))]
  43. // Windows already has builtins to do this.
  44. #![cfg(not(windows))]
  45. // We only define stack probing for these architectures today.
  46. #![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
  47. extern "C" {
  48. pub fn __rust_probestack();
  49. }
  50. // A wrapper for our implementation of __rust_probestack, which allows us to
  51. // keep the assembly inline while controlling all CFI directives in the assembly
  52. // emitted for the function.
  53. //
  54. // This is the ELF version.
  55. #[cfg(not(any(target_vendor = "apple", target_os = "uefi")))]
  56. macro_rules! define_rust_probestack {
  57. ($body: expr) => {
  58. concat!(
  59. "
  60. .pushsection .text.__rust_probestack
  61. .globl __rust_probestack
  62. .type __rust_probestack, @function
  63. .hidden __rust_probestack
  64. __rust_probestack:
  65. ",
  66. $body,
  67. "
  68. .size __rust_probestack, . - __rust_probestack
  69. .popsection
  70. "
  71. )
  72. };
  73. }
  74. #[cfg(all(target_os = "uefi", target_arch = "x86_64"))]
  75. macro_rules! define_rust_probestack {
  76. ($body: expr) => {
  77. concat!(
  78. "
  79. .globl __rust_probestack
  80. __rust_probestack:
  81. ",
  82. $body
  83. )
  84. };
  85. }
  86. // Same as above, but for Mach-O. Note that the triple underscore
  87. // is deliberate
  88. #[cfg(target_vendor = "apple")]
  89. macro_rules! define_rust_probestack {
  90. ($body: expr) => {
  91. concat!(
  92. "
  93. .globl ___rust_probestack
  94. ___rust_probestack:
  95. ",
  96. $body
  97. )
  98. };
  99. }
  100. // In UEFI x86 arch, triple underscore is deliberate.
  101. #[cfg(all(target_os = "uefi", target_arch = "x86"))]
  102. macro_rules! define_rust_probestack {
  103. ($body: expr) => {
  104. concat!(
  105. "
  106. .globl ___rust_probestack
  107. ___rust_probestack:
  108. ",
  109. $body
  110. )
  111. };
  112. }
  113. // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
  114. // ensuring that if any pages are unmapped we'll make a page fault.
  115. //
  116. // The ABI here is that the stack frame size is located in `%rax`. Upon
  117. // return we're not supposed to modify `%rsp` or `%rax`.
  118. //
  119. // Any changes to this function should be replicated to the SGX version below.
  120. #[cfg(all(
  121. target_arch = "x86_64",
  122. not(all(target_env = "sgx", target_vendor = "fortanix"))
  123. ))]
  124. global_asm!(define_rust_probestack!(
  125. "
  126. .cfi_startproc
  127. pushq %rbp
  128. .cfi_adjust_cfa_offset 8
  129. .cfi_offset %rbp, -16
  130. movq %rsp, %rbp
  131. .cfi_def_cfa_register %rbp
  132. mov %rax,%r11 // duplicate %rax as we're clobbering %r11
  133. // Main loop, taken in one page increments. We're decrementing rsp by
  134. // a page each time until there's less than a page remaining. We're
  135. // guaranteed that this function isn't called unless there's more than a
  136. // page needed.
  137. //
  138. // Note that we're also testing against `8(%rsp)` to account for the 8
  139. // bytes pushed on the stack orginally with our return address. Using
  140. // `8(%rsp)` simulates us testing the stack pointer in the caller's
  141. // context.
  142. // It's usually called when %rax >= 0x1000, but that's not always true.
  143. // Dynamic stack allocation, which is needed to implement unsized
  144. // rvalues, triggers stackprobe even if %rax < 0x1000.
  145. // Thus we have to check %r11 first to avoid segfault.
  146. cmp $0x1000,%r11
  147. jna 3f
  148. 2:
  149. sub $0x1000,%rsp
  150. test %rsp,8(%rsp)
  151. sub $0x1000,%r11
  152. cmp $0x1000,%r11
  153. ja 2b
  154. 3:
  155. // Finish up the last remaining stack space requested, getting the last
  156. // bits out of r11
  157. sub %r11,%rsp
  158. test %rsp,8(%rsp)
  159. // Restore the stack pointer to what it previously was when entering
  160. // this function. The caller will readjust the stack pointer after we
  161. // return.
  162. add %rax,%rsp
  163. leave
  164. .cfi_def_cfa_register %rsp
  165. .cfi_adjust_cfa_offset -8
  166. ret
  167. .cfi_endproc
  168. "
  169. ));
  170. // This function is the same as above, except that some instructions are
  171. // [manually patched for LVI].
  172. //
  173. // [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
  174. #[cfg(all(
  175. target_arch = "x86_64",
  176. all(target_env = "sgx", target_vendor = "fortanix")
  177. ))]
  178. global_asm!(define_rust_probestack!(
  179. "
  180. .cfi_startproc
  181. pushq %rbp
  182. .cfi_adjust_cfa_offset 8
  183. .cfi_offset %rbp, -16
  184. movq %rsp, %rbp
  185. .cfi_def_cfa_register %rbp
  186. mov %rax,%r11 // duplicate %rax as we're clobbering %r11
  187. // Main loop, taken in one page increments. We're decrementing rsp by
  188. // a page each time until there's less than a page remaining. We're
  189. // guaranteed that this function isn't called unless there's more than a
  190. // page needed.
  191. //
  192. // Note that we're also testing against `8(%rsp)` to account for the 8
  193. // bytes pushed on the stack orginally with our return address. Using
  194. // `8(%rsp)` simulates us testing the stack pointer in the caller's
  195. // context.
  196. // It's usually called when %rax >= 0x1000, but that's not always true.
  197. // Dynamic stack allocation, which is needed to implement unsized
  198. // rvalues, triggers stackprobe even if %rax < 0x1000.
  199. // Thus we have to check %r11 first to avoid segfault.
  200. cmp $0x1000,%r11
  201. jna 3f
  202. 2:
  203. sub $0x1000,%rsp
  204. test %rsp,8(%rsp)
  205. sub $0x1000,%r11
  206. cmp $0x1000,%r11
  207. ja 2b
  208. 3:
  209. // Finish up the last remaining stack space requested, getting the last
  210. // bits out of r11
  211. sub %r11,%rsp
  212. test %rsp,8(%rsp)
  213. // Restore the stack pointer to what it previously was when entering
  214. // this function. The caller will readjust the stack pointer after we
  215. // return.
  216. add %rax,%rsp
  217. leave
  218. .cfi_def_cfa_register %rsp
  219. .cfi_adjust_cfa_offset -8
  220. pop %r11
  221. lfence
  222. jmp *%r11
  223. .cfi_endproc
  224. "
  225. ));
  226. #[cfg(all(target_arch = "x86", not(target_os = "uefi")))]
  227. // This is the same as x86_64 above, only translated for 32-bit sizes. Note
  228. // that on Unix we're expected to restore everything as it was, this
  229. // function basically can't tamper with anything.
  230. //
  231. // The ABI here is the same as x86_64, except everything is 32-bits large.
  232. global_asm!(define_rust_probestack!(
  233. "
  234. .cfi_startproc
  235. push %ebp
  236. .cfi_adjust_cfa_offset 4
  237. .cfi_offset %ebp, -8
  238. mov %esp, %ebp
  239. .cfi_def_cfa_register %ebp
  240. push %ecx
  241. mov %eax,%ecx
  242. cmp $0x1000,%ecx
  243. jna 3f
  244. 2:
  245. sub $0x1000,%esp
  246. test %esp,8(%esp)
  247. sub $0x1000,%ecx
  248. cmp $0x1000,%ecx
  249. ja 2b
  250. 3:
  251. sub %ecx,%esp
  252. test %esp,8(%esp)
  253. add %eax,%esp
  254. pop %ecx
  255. leave
  256. .cfi_def_cfa_register %esp
  257. .cfi_adjust_cfa_offset -4
  258. ret
  259. .cfi_endproc
  260. "
  261. ));
  262. #[cfg(all(target_arch = "x86", target_os = "uefi"))]
  263. // UEFI target is windows like target. LLVM will do _chkstk things like windows.
  264. // probestack function will also do things like _chkstk in MSVC.
  265. // So we need to sub %ax %sp in probestack when arch is x86.
  266. //
  267. // REF: Rust commit(74e80468347)
  268. // rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805
  269. // Comments in LLVM:
  270. // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
  271. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
  272. // themselves.
  273. global_asm!(define_rust_probestack!(
  274. "
  275. .cfi_startproc
  276. push %ebp
  277. .cfi_adjust_cfa_offset 4
  278. .cfi_offset %ebp, -8
  279. mov %esp, %ebp
  280. .cfi_def_cfa_register %ebp
  281. push %ecx
  282. push %edx
  283. mov %eax,%ecx
  284. cmp $0x1000,%ecx
  285. jna 3f
  286. 2:
  287. sub $0x1000,%esp
  288. test %esp,8(%esp)
  289. sub $0x1000,%ecx
  290. cmp $0x1000,%ecx
  291. ja 2b
  292. 3:
  293. sub %ecx,%esp
  294. test %esp,8(%esp)
  295. mov 4(%ebp),%edx
  296. mov %edx, 12(%esp)
  297. add %eax,%esp
  298. pop %edx
  299. pop %ecx
  300. leave
  301. sub %eax, %esp
  302. .cfi_def_cfa_register %esp
  303. .cfi_adjust_cfa_offset -4
  304. ret
  305. .cfi_endproc
  306. "
  307. ));