probestack.rs 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. // Copyright 2017 The Rust Project Developers. See the COPYRIGHT
  2. // file at the top-level directory of this distribution and at
  3. // http://rust-lang.org/COPYRIGHT.
  4. //
  5. // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  6. // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  7. // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  8. // option. This file may not be copied, modified, or distributed
  9. // except according to those terms.
  10. //! This module defines the `__rust_probestack` intrinsic which is used in the
  11. //! implementation of "stack probes" on certain platforms.
  12. //!
  13. //! The purpose of a stack probe is to provide a static guarantee that if a
  14. //! thread has a guard page then a stack overflow is guaranteed to hit that
  15. //! guard page. If a function did not have a stack probe then there's a risk of
  16. //! having a stack frame *larger* than the guard page, so a function call could
  17. //! skip over the guard page entirely and then later hit maybe the heap or
  18. //! another thread, possibly leading to security vulnerabilities such as [The
  19. //! Stack Clash], for example.
  20. //!
  21. //! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash
  22. //!
  23. //! The `__rust_probestack` is called in the prologue of functions whose stack
  24. //! size is larger than the guard page, for example larger than 4096 bytes on
  25. //! x86. This function is then responsible for "touching" all pages relevant to
  26. //! the stack to ensure that that if any of them are the guard page we'll hit
  27. //! them guaranteed.
  28. //!
  29. //! The precise ABI for how this function operates is defined by LLVM. There's
  30. //! no real documentation as to what this is, so you'd basically need to read
  31. //! the LLVM source code for reference. Often though the test cases can be
  32. //! illuminating as to the ABI that's generated, or just looking at the output
  33. //! of `llc`.
  34. //!
  35. //! Note that `#[naked]` is typically used here for the stack probe because the
  36. //! ABI corresponds to no actual ABI.
  37. //!
  38. //! Finally it's worth noting that at the time of this writing LLVM only has
  39. //! support for stack probes on x86 and x86_64. There's no support for stack
  40. //! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
  41. //! be more than welcome to accept such a change!
  42. #![cfg(not(feature = "mangled-names"))]
  43. // Windows already has builtins to do this.
  44. #![cfg(not(windows))]
  45. // All these builtins require assembly
  46. #![cfg(not(feature = "no-asm"))]
  47. // We only define stack probing for these architectures today.
  48. #![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
  49. // We need to add .att_syntax for bootstraping the new global_asm!
  50. #![allow(unknown_lints, bad_asm_style)]
  51. extern "C" {
  52. pub fn __rust_probestack();
  53. }
  54. // A wrapper for our implementation of __rust_probestack, which allows us to
  55. // keep the assembly inline while controlling all CFI directives in the assembly
  56. // emitted for the function.
  57. //
  58. // This is the ELF version.
  59. #[cfg(not(any(target_vendor = "apple", target_os = "uefi")))]
  60. macro_rules! define_rust_probestack {
  61. ($body: expr) => {
  62. concat!(
  63. "
  64. .att_syntax
  65. .pushsection .text.__rust_probestack
  66. .globl __rust_probestack
  67. .type __rust_probestack, @function
  68. .hidden __rust_probestack
  69. __rust_probestack:
  70. ",
  71. $body,
  72. "
  73. .size __rust_probestack, . - __rust_probestack
  74. .popsection
  75. "
  76. )
  77. };
  78. }
  79. #[cfg(all(target_os = "uefi", target_arch = "x86_64"))]
  80. macro_rules! define_rust_probestack {
  81. ($body: expr) => {
  82. concat!(
  83. "
  84. .att_syntax
  85. .globl __rust_probestack
  86. __rust_probestack:
  87. ",
  88. $body
  89. )
  90. };
  91. }
  92. // Same as above, but for Mach-O. Note that the triple underscore
  93. // is deliberate
  94. #[cfg(target_vendor = "apple")]
  95. macro_rules! define_rust_probestack {
  96. ($body: expr) => {
  97. concat!(
  98. "
  99. .att_syntax
  100. .globl ___rust_probestack
  101. ___rust_probestack:
  102. ",
  103. $body
  104. )
  105. };
  106. }
  107. // In UEFI x86 arch, triple underscore is deliberate.
  108. #[cfg(all(target_os = "uefi", target_arch = "x86"))]
  109. macro_rules! define_rust_probestack {
  110. ($body: expr) => {
  111. concat!(
  112. "
  113. .att_syntax
  114. .globl ___rust_probestack
  115. ___rust_probestack:
  116. ",
  117. $body
  118. )
  119. };
  120. }
  121. // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
  122. // ensuring that if any pages are unmapped we'll make a page fault.
  123. //
  124. // The ABI here is that the stack frame size is located in `%rax`. Upon
  125. // return we're not supposed to modify `%rsp` or `%rax`.
  126. //
  127. // Any changes to this function should be replicated to the SGX version below.
  128. #[cfg(all(
  129. target_arch = "x86_64",
  130. not(all(target_env = "sgx", target_vendor = "fortanix"))
  131. ))]
  132. global_asm!(define_rust_probestack!(
  133. "
  134. .cfi_startproc
  135. pushq %rbp
  136. .cfi_adjust_cfa_offset 8
  137. .cfi_offset %rbp, -16
  138. movq %rsp, %rbp
  139. .cfi_def_cfa_register %rbp
  140. mov %rax,%r11 // duplicate %rax as we're clobbering %r11
  141. // Main loop, taken in one page increments. We're decrementing rsp by
  142. // a page each time until there's less than a page remaining. We're
  143. // guaranteed that this function isn't called unless there's more than a
  144. // page needed.
  145. //
  146. // Note that we're also testing against `8(%rsp)` to account for the 8
  147. // bytes pushed on the stack orginally with our return address. Using
  148. // `8(%rsp)` simulates us testing the stack pointer in the caller's
  149. // context.
  150. // It's usually called when %rax >= 0x1000, but that's not always true.
  151. // Dynamic stack allocation, which is needed to implement unsized
  152. // rvalues, triggers stackprobe even if %rax < 0x1000.
  153. // Thus we have to check %r11 first to avoid segfault.
  154. cmp $0x1000,%r11
  155. jna 3f
  156. 2:
  157. sub $0x1000,%rsp
  158. test %rsp,8(%rsp)
  159. sub $0x1000,%r11
  160. cmp $0x1000,%r11
  161. ja 2b
  162. 3:
  163. // Finish up the last remaining stack space requested, getting the last
  164. // bits out of r11
  165. sub %r11,%rsp
  166. test %rsp,8(%rsp)
  167. // Restore the stack pointer to what it previously was when entering
  168. // this function. The caller will readjust the stack pointer after we
  169. // return.
  170. add %rax,%rsp
  171. leave
  172. .cfi_def_cfa_register %rsp
  173. .cfi_adjust_cfa_offset -8
  174. ret
  175. .cfi_endproc
  176. "
  177. ));
  178. // This function is the same as above, except that some instructions are
  179. // [manually patched for LVI].
  180. //
  181. // [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
  182. #[cfg(all(
  183. target_arch = "x86_64",
  184. all(target_env = "sgx", target_vendor = "fortanix")
  185. ))]
  186. global_asm!(define_rust_probestack!(
  187. "
  188. .cfi_startproc
  189. pushq %rbp
  190. .cfi_adjust_cfa_offset 8
  191. .cfi_offset %rbp, -16
  192. movq %rsp, %rbp
  193. .cfi_def_cfa_register %rbp
  194. mov %rax,%r11 // duplicate %rax as we're clobbering %r11
  195. // Main loop, taken in one page increments. We're decrementing rsp by
  196. // a page each time until there's less than a page remaining. We're
  197. // guaranteed that this function isn't called unless there's more than a
  198. // page needed.
  199. //
  200. // Note that we're also testing against `8(%rsp)` to account for the 8
  201. // bytes pushed on the stack orginally with our return address. Using
  202. // `8(%rsp)` simulates us testing the stack pointer in the caller's
  203. // context.
  204. // It's usually called when %rax >= 0x1000, but that's not always true.
  205. // Dynamic stack allocation, which is needed to implement unsized
  206. // rvalues, triggers stackprobe even if %rax < 0x1000.
  207. // Thus we have to check %r11 first to avoid segfault.
  208. cmp $0x1000,%r11
  209. jna 3f
  210. 2:
  211. sub $0x1000,%rsp
  212. test %rsp,8(%rsp)
  213. sub $0x1000,%r11
  214. cmp $0x1000,%r11
  215. ja 2b
  216. 3:
  217. // Finish up the last remaining stack space requested, getting the last
  218. // bits out of r11
  219. sub %r11,%rsp
  220. test %rsp,8(%rsp)
  221. // Restore the stack pointer to what it previously was when entering
  222. // this function. The caller will readjust the stack pointer after we
  223. // return.
  224. add %rax,%rsp
  225. leave
  226. .cfi_def_cfa_register %rsp
  227. .cfi_adjust_cfa_offset -8
  228. pop %r11
  229. lfence
  230. jmp *%r11
  231. .cfi_endproc
  232. "
  233. ));
  234. #[cfg(all(target_arch = "x86", not(target_os = "uefi")))]
  235. // This is the same as x86_64 above, only translated for 32-bit sizes. Note
  236. // that on Unix we're expected to restore everything as it was, this
  237. // function basically can't tamper with anything.
  238. //
  239. // The ABI here is the same as x86_64, except everything is 32-bits large.
  240. global_asm!(define_rust_probestack!(
  241. "
  242. .cfi_startproc
  243. push %ebp
  244. .cfi_adjust_cfa_offset 4
  245. .cfi_offset %ebp, -8
  246. mov %esp, %ebp
  247. .cfi_def_cfa_register %ebp
  248. push %ecx
  249. mov %eax,%ecx
  250. cmp $0x1000,%ecx
  251. jna 3f
  252. 2:
  253. sub $0x1000,%esp
  254. test %esp,8(%esp)
  255. sub $0x1000,%ecx
  256. cmp $0x1000,%ecx
  257. ja 2b
  258. 3:
  259. sub %ecx,%esp
  260. test %esp,8(%esp)
  261. add %eax,%esp
  262. pop %ecx
  263. leave
  264. .cfi_def_cfa_register %esp
  265. .cfi_adjust_cfa_offset -4
  266. ret
  267. .cfi_endproc
  268. "
  269. ));
  270. #[cfg(all(target_arch = "x86", target_os = "uefi"))]
  271. // UEFI target is windows like target. LLVM will do _chkstk things like windows.
  272. // probestack function will also do things like _chkstk in MSVC.
  273. // So we need to sub %ax %sp in probestack when arch is x86.
  274. //
  275. // REF: Rust commit(74e80468347)
  276. // rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805
  277. // Comments in LLVM:
  278. // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
  279. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
  280. // themselves.
  281. global_asm!(define_rust_probestack!(
  282. "
  283. .cfi_startproc
  284. push %ebp
  285. .cfi_adjust_cfa_offset 4
  286. .cfi_offset %ebp, -8
  287. mov %esp, %ebp
  288. .cfi_def_cfa_register %ebp
  289. push %ecx
  290. push %edx
  291. mov %eax,%ecx
  292. cmp $0x1000,%ecx
  293. jna 3f
  294. 2:
  295. sub $0x1000,%esp
  296. test %esp,8(%esp)
  297. sub $0x1000,%ecx
  298. cmp $0x1000,%ecx
  299. ja 2b
  300. 3:
  301. sub %ecx,%esp
  302. test %esp,8(%esp)
  303. mov 4(%ebp),%edx
  304. mov %edx, 12(%esp)
  305. add %eax,%esp
  306. pop %edx
  307. pop %ecx
  308. leave
  309. sub %eax, %esp
  310. .cfi_def_cfa_register %esp
  311. .cfi_adjust_cfa_offset -4
  312. ret
  313. .cfi_endproc
  314. "
  315. ));