Ver Fonte

Merge pull request #446 from Amanieu/asm

Amanieu d'Antras há 3 anos atrás
pai
commit
0f8617449f
10 ficheiros alterados com 60 adições e 48 exclusões
  1. 2 1
      build.rs
  2. 8 8
      src/arm.rs
  3. 9 8
      src/float/div.rs
  4. 2 0
      src/int/leading_zeros.rs
  5. 2 2
      src/int/specialized_div_rem/mod.rs
  6. 2 0
      src/lib.rs
  7. 5 5
      src/mem/x86_64.rs
  8. 24 18
      src/probestack.rs
  9. 3 3
      src/x86.rs
  10. 3 3
      src/x86_64.rs

+ 2 - 1
build.rs

@@ -560,7 +560,8 @@ mod c {
                         file,
                         "#include \"{}\"",
                         outlined_atomics_file.canonicalize().unwrap().display()
-                    );
+                    )
+                    .unwrap();
                     drop(file);
                     cfg.file(path);
 

+ 8 - 8
src/arm.rs

@@ -11,7 +11,7 @@ use core::intrinsics;
 #[naked]
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 pub unsafe extern "C" fn __aeabi_uidivmod() {
-    asm!(
+    core::arch::asm!(
         "push {{lr}}",
         "sub sp, sp, #4",
         "mov r2, sp",
@@ -27,7 +27,7 @@ pub unsafe extern "C" fn __aeabi_uidivmod() {
 #[naked]
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 pub unsafe extern "C" fn __aeabi_uidivmod() {
-    asm!(
+    core::arch::asm!(
         "push {{lr}}",
         "sub sp, sp, #4",
         "mov r2, sp",
@@ -43,7 +43,7 @@ pub unsafe extern "C" fn __aeabi_uidivmod() {
 #[naked]
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 pub unsafe extern "C" fn __aeabi_uldivmod() {
-    asm!(
+    core::arch::asm!(
         "push {{r4, lr}}",
         "sub sp, sp, #16",
         "add r4, sp, #8",
@@ -61,7 +61,7 @@ pub unsafe extern "C" fn __aeabi_uldivmod() {
 #[naked]
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 pub unsafe extern "C" fn __aeabi_uldivmod() {
-    asm!(
+    core::arch::asm!(
         "push {{r4, lr}}",
         "sub sp, sp, #16",
         "add r4, sp, #8",
@@ -79,7 +79,7 @@ pub unsafe extern "C" fn __aeabi_uldivmod() {
 #[naked]
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 pub unsafe extern "C" fn __aeabi_idivmod() {
-    asm!(
+    core::arch::asm!(
         "push {{r0, r1, r4, lr}}",
         "bl __aeabi_idiv",
         "pop {{r1, r2}}",
@@ -94,7 +94,7 @@ pub unsafe extern "C" fn __aeabi_idivmod() {
 #[naked]
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 pub unsafe extern "C" fn __aeabi_idivmod() {
-    asm!(
+    core::arch::asm!(
         "push {{r0, r1, r4, lr}}",
         "bl ___aeabi_idiv",
         "pop {{r1, r2}}",
@@ -109,7 +109,7 @@ pub unsafe extern "C" fn __aeabi_idivmod() {
 #[naked]
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 pub unsafe extern "C" fn __aeabi_ldivmod() {
-    asm!(
+    core::arch::asm!(
         "push {{r4, lr}}",
         "sub sp, sp, #16",
         "add r4, sp, #8",
@@ -127,7 +127,7 @@ pub unsafe extern "C" fn __aeabi_ldivmod() {
 #[naked]
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
 pub unsafe extern "C" fn __aeabi_ldivmod() {
-    asm!(
+    core::arch::asm!(
         "push {{r4, lr}}",
         "sub sp, sp, #16",
         "add r4, sp, #8",

+ 9 - 8
src/float/div.rs

@@ -132,8 +132,9 @@ where
     // This doubles the number of correct binary digits in the approximation
     // with each iteration, so after three iterations, we have about 28 binary
     // digits of accuracy.
-    let mut correction: u32;
-    correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32);
+
+    let mut correction: u32 =
+        negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32);
     reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32;
     correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32);
     reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32;
@@ -342,8 +343,9 @@ where
     // This doubles the number of correct binary digits in the approximation
     // with each iteration, so after three iterations, we have about 28 binary
     // digits of accuracy.
-    let mut correction32: u32;
-    correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32);
+
+    let mut correction32: u32 =
+        negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32);
     recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32;
     correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32);
     recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32;
@@ -359,16 +361,15 @@ where
     // We need to perform one more iteration to get us to 56 binary digits;
     // The last iteration needs to happen with extra precision.
     let q63blo = CastInto::<u32>::cast(b_significand << 11.cast());
-    let correction: u64;
-    let mut reciprocal: u64;
-    correction = negate_u64(
+
+    let correction: u64 = negate_u64(
         (recip32 as u64)
             .wrapping_mul(q31b as u64)
             .wrapping_add((recip32 as u64).wrapping_mul(q63blo as u64) >> 32),
     );
     let c_hi = (correction >> 32) as u32;
     let c_lo = correction as u32;
-    reciprocal = (recip32 as u64)
+    let mut reciprocal: u64 = (recip32 as u64)
         .wrapping_mul(c_hi as u64)
         .wrapping_add((recip32 as u64).wrapping_mul(c_lo as u64) >> 32);
 

+ 2 - 0
src/int/leading_zeros.rs

@@ -5,6 +5,7 @@
 
 public_test_dep! {
 /// Returns the number of leading binary zeros in `x`.
+#[allow(dead_code)]
 pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
     // The basic idea is to test if the higher bits of `x` are zero and bisect the number
     // of leading zeros. It is possible for all branches of the bisection to use the same
@@ -78,6 +79,7 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
 
 public_test_dep! {
 /// Returns the number of leading binary zeros in `x`.
+#[allow(dead_code)]
 pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
     let mut x = x;
     // the number of potential leading zeros

+ 2 - 2
src/int/specialized_div_rem/mod.rs

@@ -184,7 +184,7 @@ unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) {
         // divides the combined registers rdx:rax (`duo` is split into two 64 bit parts to do this)
         // by `div`. The quotient is stored in rax and the remainder in rdx.
         // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
-        asm!(
+        core::arch::asm!(
             "div {0}",
             in(reg) div,
             inlateout("rax") duo_lo => quo,
@@ -271,7 +271,7 @@ unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) {
         // divides the combined registers rdx:rax (`duo` is split into two 32 bit parts to do this)
         // by `div`. The quotient is stored in rax and the remainder in rdx.
         // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
-        asm!(
+        core::arch::asm!(
             "div {0}",
             in(reg) div,
             inlateout("rax") duo_lo => quo,

+ 2 - 0
src/lib.rs

@@ -18,6 +18,8 @@
 #![allow(improper_ctypes, improper_ctypes_definitions)]
 // `mem::swap` cannot be used because it may generate references to memcpy in unoptimized code.
 #![allow(clippy::manual_swap)]
+// Support compiling on both stage0 and stage1 which may differ in supported stable features.
+#![allow(stable_features)]
 
 // We disable #[no_mangle] for tests so that we can verify the test results
 // against the native compiler-rt implementations of the builtins.

+ 5 - 5
src/mem/x86_64.rs

@@ -20,7 +20,7 @@
 #[cfg(target_feature = "ermsb")]
 pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) {
     // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
-    asm!(
+    core::arch::asm!(
         "repe movsb (%rsi), (%rdi)",
         inout("rcx") count => _,
         inout("rdi") dest => _,
@@ -35,7 +35,7 @@ pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) {
     let qword_count = count >> 3;
     let byte_count = count & 0b111;
     // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
-    asm!(
+    core::arch::asm!(
         "repe movsq (%rsi), (%rdi)",
         "mov {byte_count:e}, %ecx",
         "repe movsb (%rsi), (%rdi)",
@@ -52,7 +52,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) {
     let qword_count = count >> 3;
     let byte_count = count & 0b111;
     // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
-    asm!(
+    core::arch::asm!(
         "std",
         "repe movsq (%rsi), (%rdi)",
         "movl {byte_count:e}, %ecx",
@@ -72,7 +72,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) {
 #[cfg(target_feature = "ermsb")]
 pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) {
     // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
-    asm!(
+    core::arch::asm!(
         "repe stosb %al, (%rdi)",
         inout("rcx") count => _,
         inout("rdi") dest => _,
@@ -87,7 +87,7 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) {
     let qword_count = count >> 3;
     let byte_count = count & 0b111;
     // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
-    asm!(
+    core::arch::asm!(
         "repe stosq %rax, (%rdi)",
         "mov {byte_count:e}, %ecx",
         "repe stosb %al, (%rdi)",

+ 24 - 18
src/probestack.rs

@@ -48,8 +48,6 @@
 #![cfg(not(feature = "no-asm"))]
 // We only define stack probing for these architectures today.
 #![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-// We need to add .att_syntax for bootstraping the new global_asm!
-#![allow(unknown_lints, bad_asm_style)]
 
 extern "C" {
     pub fn __rust_probestack();
@@ -65,7 +63,6 @@ macro_rules! define_rust_probestack {
     ($body: expr) => {
         concat!(
             "
-            .att_syntax
             .pushsection .text.__rust_probestack
             .globl __rust_probestack
             .type  __rust_probestack, @function
@@ -86,7 +83,6 @@ macro_rules! define_rust_probestack {
     ($body: expr) => {
         concat!(
             "
-            .att_syntax
             .globl __rust_probestack
         __rust_probestack:
             ",
@@ -102,7 +98,6 @@ macro_rules! define_rust_probestack {
     ($body: expr) => {
         concat!(
             "
-            .att_syntax
             .globl ___rust_probestack
         ___rust_probestack:
             ",
@@ -117,7 +112,6 @@ macro_rules! define_rust_probestack {
     ($body: expr) => {
         concat!(
             "
-            .att_syntax
             .globl ___rust_probestack
         ___rust_probestack:
             ",
@@ -137,8 +131,9 @@ macro_rules! define_rust_probestack {
     target_arch = "x86_64",
     not(all(target_env = "sgx", target_vendor = "fortanix"))
 ))]
-global_asm!(define_rust_probestack!(
-    "
+core::arch::global_asm!(
+    define_rust_probestack!(
+        "
     .cfi_startproc
     pushq  %rbp
     .cfi_adjust_cfa_offset 8
@@ -188,7 +183,9 @@ global_asm!(define_rust_probestack!(
     ret
     .cfi_endproc
     "
-));
+    ),
+    options(att_syntax)
+);
 
 // This function is the same as above, except that some instructions are
 // [manually patched for LVI].
@@ -198,8 +195,9 @@ global_asm!(define_rust_probestack!(
     target_arch = "x86_64",
     all(target_env = "sgx", target_vendor = "fortanix")
 ))]
-global_asm!(define_rust_probestack!(
-    "
+core::arch::global_asm!(
+    define_rust_probestack!(
+        "
     .cfi_startproc
     pushq  %rbp
     .cfi_adjust_cfa_offset 8
@@ -251,7 +249,9 @@ global_asm!(define_rust_probestack!(
     jmp *%r11
     .cfi_endproc
     "
-));
+    ),
+    options(att_syntax)
+);
 
 #[cfg(all(target_arch = "x86", not(target_os = "uefi")))]
 // This is the same as x86_64 above, only translated for 32-bit sizes. Note
@@ -259,8 +259,9 @@ global_asm!(define_rust_probestack!(
 // function basically can't tamper with anything.
 //
 // The ABI here is the same as x86_64, except everything is 32-bits large.
-global_asm!(define_rust_probestack!(
-    "
+core::arch::global_asm!(
+    define_rust_probestack!(
+        "
     .cfi_startproc
     push   %ebp
     .cfi_adjust_cfa_offset 4
@@ -291,7 +292,9 @@ global_asm!(define_rust_probestack!(
     ret
     .cfi_endproc
     "
-));
+    ),
+    options(att_syntax)
+);
 
 #[cfg(all(target_arch = "x86", target_os = "uefi"))]
 // UEFI target is windows like target. LLVM will do _chkstk things like windows.
@@ -304,8 +307,9 @@ global_asm!(define_rust_probestack!(
 //   MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
 //   MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
 //   themselves.
-global_asm!(define_rust_probestack!(
-    "
+core::arch::global_asm!(
+    define_rust_probestack!(
+        "
     .cfi_startproc
     push   %ebp
     .cfi_adjust_cfa_offset 4
@@ -341,4 +345,6 @@ global_asm!(define_rust_probestack!(
     ret
     .cfi_endproc
     "
-));
+    ),
+    options(att_syntax)
+);

+ 3 - 3
src/x86.rs

@@ -17,7 +17,7 @@ use core::intrinsics;
 #[naked]
 #[no_mangle]
 pub unsafe extern "C" fn ___chkstk_ms() {
-    asm!(
+    core::arch::asm!(
         "push   %ecx",
         "push   %eax",
         "cmp    $0x1000,%eax",
@@ -49,7 +49,7 @@ pub unsafe extern "C" fn ___chkstk_ms() {
 #[naked]
 #[no_mangle]
 pub unsafe extern "C" fn __alloca() {
-    asm!(
+    core::arch::asm!(
         "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable"
         options(noreturn, att_syntax)
     );
@@ -64,7 +64,7 @@ pub unsafe extern "C" fn __alloca() {
 #[naked]
 #[no_mangle]
 pub unsafe extern "C" fn ___chkstk() {
-    asm!(
+    core::arch::asm!(
         "push   %ecx",
         "cmp    $0x1000,%eax",
         "lea    8(%esp),%ecx", // esp before calling this routine -> ecx

+ 3 - 3
src/x86_64.rs

@@ -17,7 +17,7 @@ use core::intrinsics;
 #[naked]
 #[no_mangle]
 pub unsafe extern "C" fn ___chkstk_ms() {
-    asm!(
+    core::arch::asm!(
         "push   %rcx",
         "push   %rax",
         "cmp    $0x1000,%rax",
@@ -48,7 +48,7 @@ pub unsafe extern "C" fn ___chkstk_ms() {
 #[naked]
 #[no_mangle]
 pub unsafe extern "C" fn __alloca() {
-    asm!(
+    core::arch::asm!(
         "mov    %rcx,%rax", // x64 _alloca is a normal function with parameter in rcx
         "jmp    ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable"
         options(noreturn, att_syntax)
@@ -64,7 +64,7 @@ pub unsafe extern "C" fn __alloca() {
 #[naked]
 #[no_mangle]
 pub unsafe extern "C" fn ___chkstk() {
-    asm!(
+    core::arch::asm!(
         "push   %rcx",
         "cmp    $0x1000,%rax",
         "lea    16(%rsp),%rcx", // rsp before calling this routine -> rcx