Browse Source

use AAPCS calling convention on all aeabi intrinsics

also, on ARM, inline(always) the actual implementation of the intrinsics so we
end with code like this:

```
00000000 <__aeabi_dadd>:
    (implementation here)
```

instead of "trampolines" like this:

```
00000000 <__aeabi_dadd>:
    (shuffle registers)
    (call __adddf3)

00000000 <__adddf3>:
    (implementation here)
```

closes #116
Jorge Aparicio 8 years ago
parent
commit
dfa7b161aa
7 changed files with 36 additions and 24 deletions
  1. 20 20
      src/arm.rs
  2. 2 1
      src/float/add.rs
  3. 2 1
      src/int/mul.rs
  4. 2 1
      src/int/sdiv.rs
  5. 4 0
      src/int/shift.rs
  6. 2 1
      src/int/udiv.rs
  7. 4 0
      src/lib.rs

+ 20 - 20
src/arm.rs

@@ -62,44 +62,44 @@ pub unsafe fn __aeabi_ldivmod() {
 
 // TODO: These aeabi_* functions should be defined as aliases
 #[cfg_attr(not(test), no_mangle)]
-pub extern "C" fn __aeabi_dadd(a: f64, b: f64) -> f64 {
+pub extern "aapcs" fn __aeabi_dadd(a: f64, b: f64) -> f64 {
     ::float::add::__adddf3(a, b)
 }
 
 #[cfg_attr(not(test), no_mangle)]
-pub extern "C" fn __aeabi_fadd(a: f32, b: f32) -> f32 {
+pub extern "aapcs" fn __aeabi_fadd(a: f32, b: f32) -> f32 {
     ::float::add::__addsf3(a, b)
 }
 
 #[cfg(not(all(feature = "c", target_arch = "arm", not(target_os = "ios"), not(thumbv6m))))]
 #[cfg_attr(not(test), no_mangle)]
-pub extern "C" fn __aeabi_idiv(a: i32, b: i32) -> i32 {
+pub extern "aapcs" fn __aeabi_idiv(a: i32, b: i32) -> i32 {
     ::int::sdiv::__divsi3(a, b)
 }
 
 #[cfg_attr(not(test), no_mangle)]
-pub extern "C" fn __aeabi_lasr(a: i64, b: u32) -> i64 {
+pub extern "aapcs" fn __aeabi_lasr(a: i64, b: u32) -> i64 {
     ::int::shift::__ashrdi3(a, b)
 }
 
 #[cfg_attr(not(test), no_mangle)]
-pub extern "C" fn __aeabi_llsl(a: u64, b: u32) -> u64 {
+pub extern "aapcs" fn __aeabi_llsl(a: u64, b: u32) -> u64 {
     ::int::shift::__ashldi3(a, b)
 }
 
 #[cfg_attr(not(test), no_mangle)]
-pub extern "C" fn __aeabi_llsr(a: u64, b: u32) -> u64 {
+pub extern "aapcs" fn __aeabi_llsr(a: u64, b: u32) -> u64 {
     ::int::shift::__lshrdi3(a, b)
 }
 
 #[cfg_attr(not(test), no_mangle)]
-pub extern "C" fn __aeabi_lmul(a: u64, b: u64) -> u64 {
+pub extern "aapcs" fn __aeabi_lmul(a: u64, b: u64) -> u64 {
     ::int::mul::__muldi3(a, b)
 }
 
 #[cfg(not(all(feature = "c", target_arch = "arm", not(target_os = "ios"), not(thumbv6m))))]
 #[cfg_attr(not(test), no_mangle)]
-pub extern "C" fn __aeabi_uidiv(a: u32, b: u32) -> u32 {
+pub extern "aapcs" fn __aeabi_uidiv(a: u32, b: u32) -> u32 {
     ::int::udiv::__udivsi3(a, b)
 }
 
@@ -113,55 +113,55 @@ extern "C" {
 // FIXME: The `*4` and `*8` variants should be defined as aliases.
 
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
+pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
     memcpy(dest, src, n);
 }
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
+pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
     memcpy(dest, src, n);
 }
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
+pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
     memcpy(dest, src, n);
 }
 
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {
+pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {
     memmove(dest, src, n);
 }
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) {
+pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) {
     memmove(dest, src, n);
 }
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) {
+pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) {
     memmove(dest, src, n);
 }
 
 // Note the different argument order
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) {
+pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) {
     memset(dest, c, n);
 }
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) {
+pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) {
     memset(dest, c, n);
 }
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
+pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
     memset(dest, c, n);
 }
 
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memclr(dest: *mut u8, n: usize) {
+pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) {
     memset(dest, 0, n);
 }
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memclr4(dest: *mut u8, n: usize) {
+pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) {
     memset(dest, 0, n);
 }
 #[cfg_attr(not(test), no_mangle)]
-pub unsafe extern "C" fn __aeabi_memclr8(dest: *mut u8, n: usize) {
+pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) {
     memset(dest, 0, n);
 }
 

+ 2 - 1
src/float/add.rs

@@ -7,7 +7,8 @@ macro_rules! add {
     ($intrinsic:ident: $ty:ty) => {
         /// Returns `a + b`
         #[allow(unused_parens)]
-        #[cfg_attr(not(test), no_mangle)]
+        #[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)]
+        #[cfg_attr(all(not(test), target_arch = "arm"), inline(always))]
         pub extern fn $intrinsic(a: $ty, b: $ty) -> $ty {
             let one = Wrapping(1 as <$ty as Float>::Int);
             let zero = Wrapping(0 as <$ty as Float>::Int);

+ 2 - 1
src/int/mul.rs

@@ -4,7 +4,8 @@ use int::Int;
 macro_rules! mul {
     ($intrinsic:ident: $ty:ty) => {
         /// Returns `a * b`
-        #[cfg_attr(not(test), no_mangle)]
+        #[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)]
+        #[cfg_attr(all(not(test), target_arch = "arm"), inline(always))]
         pub extern "C" fn $intrinsic(a: $ty, b: $ty) -> $ty {
             let half_bits = <$ty>::bits() / 4;
             let lower_mask = !0 >> half_bits;

+ 2 - 1
src/int/sdiv.rs

@@ -42,7 +42,8 @@ macro_rules! mod_ {
 macro_rules! divmod {
     ($intrinsic:ident, $div:ident: $ty:ty) => {
         /// Returns `a / b` and sets `*rem = n % d`
-        #[cfg_attr(not(test), no_mangle)]
+        #[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)]
+        #[cfg_attr(all(not(test), target_arch = "arm"), inline(always))]
         pub extern "C" fn $intrinsic(a: $ty, b: $ty, rem: &mut $ty) -> $ty {
             #[cfg(all(feature = "c", any(target_arch = "x86")))]
             extern {

+ 4 - 0
src/int/shift.rs

@@ -4,6 +4,8 @@ macro_rules! ashl {
     ($intrinsic:ident: $ty:ty) => {
         /// Returns `a << b`, requires `b < $ty::bits()`
         #[cfg_attr(not(test), no_mangle)]
+        #[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)]
+        #[cfg_attr(all(not(test), target_arch = "arm"), inline(always))]
         pub extern "C" fn $intrinsic(a: $ty, b: u32) -> $ty {
             let half_bits = <$ty>::bits() / 2;
             if b & half_bits != 0 {
@@ -21,6 +23,8 @@ macro_rules! ashr {
     ($intrinsic:ident: $ty:ty) => {
         /// Returns arithmetic `a >> b`, requires `b < $ty::bits()`
         #[cfg_attr(not(test), no_mangle)]
+        #[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)]
+        #[cfg_attr(all(not(test), target_arch = "arm"), inline(always))]
         pub extern "C" fn $intrinsic(a: $ty, b: u32) -> $ty {
             let half_bits = <$ty>::bits() / 2;
             if b & half_bits != 0 {

+ 2 - 1
src/int/udiv.rs

@@ -3,7 +3,8 @@ use int::{Int, LargeInt};
 
 /// Returns `n / d`
 #[cfg(not(all(feature = "c", target_arch = "arm", not(target_os = "ios"), not(thumbv6m))))]
-#[cfg_attr(not(test), no_mangle)]
+#[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)]
+#[cfg_attr(all(not(test), target_arch = "arm"), inline(always))]
 pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 {
     // Special cases
     if d == 0 {

+ 4 - 0
src/lib.rs

@@ -28,6 +28,10 @@
 // NOTE cfg(all(feature = "c", ..)) indicate that compiler-rt provides an arch optimized
 // implementation of that intrinsic and we'll prefer to use that
 
+// NOTE(aapcs, aeabi, arm) ARM targets use intrinsics named __aeabi_* instead of the intrinsics
+// that follow "x86 naming convention" (e.g. addsf3). Those aeabi intrinsics must adhere to the
+// AAPCS calling convention (`extern "aapcs"`) because that's how LLVM will call them.
+
 // TODO(rust-lang/rust#37029) use e.g. checked_div(_).unwrap_or_else(|| abort())
 macro_rules! udiv {
     ($a:expr, $b:expr) => {