Browse Source

optimize 32-bit aligned mem{cpy,clr,set} intrinsics for ARM

this reduces the execution time of all these routines by 40-70%
Jorge Aparicio 7 years ago
parent
commit
75c6ccca71
6 changed files with 463 additions and 30 deletions
  1. 57 25
      src/arm.rs
  2. 1 1
      src/lib.rs
  3. 4 4
      src/mem.rs
  4. 58 0
      tests/aeabi_memclr.rs
  5. 69 0
      tests/aeabi_memcpy.rs
  6. 274 0
      tests/aeabi_memset.rs

+ 57 - 25
src/arm.rs

@@ -1,7 +1,6 @@
-use core::intrinsics;
+use core::{intrinsics, ptr};
 
-#[cfg(feature = "mem")]
-use mem::{memcpy, memmove, memset};
+use mem;
 
 // NOTE This function and the ones below are implemented using assembly because they using a custom
 // calling convention which can't be implemented using a normal Rust function
@@ -60,65 +59,98 @@ pub unsafe fn __aeabi_ldivmod() {
     intrinsics::unreachable();
 }
 
-// TODO: These aeabi_* functions should be defined as aliases
-#[cfg(not(feature = "mem"))]
-extern "C" {
-    fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
-    fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
-    fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8;
-}
-
 // FIXME: The `*4` and `*8` variants should be defined as aliases.
 
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[linkage = "weak"]
 pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
-    memcpy(dest, src, n);
+    mem::memcpy(dest, src, n);
 }
+
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
-pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
-    memcpy(dest, src, n);
+#[linkage = "weak"]
+pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) {
+    let mut dest = dest as *mut u32;
+    let mut src = src as *mut u32;
+
+    while n >= 4 {
+        ptr::write(dest, ptr::read(src));
+        dest = dest.offset(1);
+        src = src.offset(1);
+        n -= 4;
+    }
+
+    __aeabi_memcpy(dest as *mut u8, src as *const u8, n);
 }
+
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[linkage = "weak"]
 pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
-    memcpy(dest, src, n);
+    __aeabi_memcpy4(dest, src, n);
 }
 
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[linkage = "weak"]
 pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {
-    memmove(dest, src, n);
+    mem::memmove(dest, src, n);
 }
+
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[linkage = "weak"]
 pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) {
-    memmove(dest, src, n);
+    __aeabi_memmove(dest, src, n);
 }
+
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[linkage = "weak"]
 pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) {
-    memmove(dest, src, n);
+    __aeabi_memmove(dest, src, n);
 }
 
 // Note the different argument order
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[linkage = "weak"]
 pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) {
-    memset(dest, c, n);
+    mem::memset(dest, c, n);
 }
+
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
-pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) {
-    memset(dest, c, n);
+#[linkage = "weak"]
+pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32) {
+    let mut dest = dest as *mut u32;
+
+    let byte = (c as u32) & 0xff;
+    let c = (byte << 24) | (byte << 16) | (byte << 8) | byte;
+
+    while n >= 4 {
+        ptr::write(dest, c);
+        dest = dest.offset(1);
+        n -= 4;
+    }
+
+    __aeabi_memset(dest as *mut u8, n, byte as i32);
 }
+
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[linkage = "weak"]
 pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
-    memset(dest, c, n);
+    __aeabi_memset4(dest, n, c);
 }
 
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[linkage = "weak"]
 pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) {
-    memset(dest, 0, n);
+    __aeabi_memset(dest, n, 0);
 }
+
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[linkage = "weak"]
 pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) {
-    memset(dest, 0, n);
+    __aeabi_memset4(dest, n, 0);
 }
+
 #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[linkage = "weak"]
 pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) {
-    memset(dest, 0, n);
+    __aeabi_memset4(dest, n, 0);
 }

+ 1 - 1
src/lib.rs

@@ -16,6 +16,7 @@
 #![feature(i128_type)]
 #![feature(repr_simd)]
 #![feature(abi_unadjusted)]
+#![feature(linkage)]
 #![allow(unused_features)]
 #![no_builtins]
 #![unstable(feature = "compiler_builtins_lib",
@@ -45,7 +46,6 @@ mod macros;
 pub mod int;
 pub mod float;
 
-#[cfg(feature = "mem")]
 pub mod mem;
 
 #[cfg(target_arch = "arm")]

+ 4 - 4
src/mem.rs

@@ -5,7 +5,7 @@ type c_int = i16;
 #[cfg(not(target_pointer_width = "16"))]
 type c_int = i32;
 
-#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
 pub unsafe extern "C" fn memcpy(dest: *mut u8,
                                 src: *const u8,
                                 n: usize)
@@ -18,7 +18,7 @@ pub unsafe extern "C" fn memcpy(dest: *mut u8,
     dest
 }
 
-#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
 pub unsafe extern "C" fn memmove(dest: *mut u8,
                                  src: *const u8,
                                  n: usize)
@@ -41,7 +41,7 @@ pub unsafe extern "C" fn memmove(dest: *mut u8,
     dest
 }
 
-#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
 pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
     let mut i = 0;
     while i < n {
@@ -51,7 +51,7 @@ pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
     s
 }
 
-#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
 pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
     let mut i = 0;
     while i < n {

+ 58 - 0
tests/aeabi_memclr.rs

@@ -0,0 +1,58 @@
+#![cfg(all(target_arch = "arm",
+           not(any(target_env = "gnu", target_env = "musl")),
+           target_os = "linux",
+           feature = "mem"))]
+#![feature(compiler_builtins_lib)]
+#![no_std]
+
+extern crate compiler_builtins;
+
+// test runner
+extern crate utest_cortex_m_qemu;
+
+// overrides `panic!`
+#[macro_use]
+extern crate utest_macros;
+
+use core::mem;
+
+macro_rules! panic {
+    ($($tt:tt)*) => {
+        upanic!($($tt)*);
+    };
+}
+
+extern "C" {
+    fn __aeabi_memclr4(dest: *mut u8, n: usize);
+    fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
+}
+
+struct Aligned {
+    array: [u8; 8],
+    _alignment: [u32; 0],
+}
+
+impl Aligned {
+    fn new() -> Self {
+        Aligned {
+            array: [0; 8],
+            _alignment: [],
+        }
+    }
+}
+
+#[test]
+fn memclr4() {
+    let mut aligned = Aligned::new();;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+
+    for n in 0..9 {
+        unsafe {
+            __aeabi_memset4(xs.as_mut_ptr(), n, 0xff);
+            __aeabi_memclr4(xs.as_mut_ptr(), n);
+        }
+
+        assert!(xs[0..n].iter().all(|x| *x == 0));
+    }
+}

+ 69 - 0
tests/aeabi_memcpy.rs

@@ -0,0 +1,69 @@
+#![cfg(all(target_arch = "arm",
+           not(any(target_env = "gnu", target_env = "musl")),
+           target_os = "linux",
+           feature = "mem"))]
+#![feature(compiler_builtins_lib)]
+#![no_std]
+
+extern crate compiler_builtins;
+
+// test runner
+extern crate utest_cortex_m_qemu;
+
+// overrides `panic!`
+#[macro_use]
+extern crate utest_macros;
+
+macro_rules! panic {
+    ($($tt:tt)*) => {
+        upanic!($($tt)*);
+    };
+}
+
+extern "C" {
+    fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize);
+    fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize);
+}
+
+struct Aligned {
+    array: [u8; 8],
+    _alignment: [u32; 0],
+}
+
+impl Aligned {
+    fn new(array: [u8; 8]) -> Self {
+        Aligned {
+            array: array,
+            _alignment: [],
+        }
+    }
+}
+
+#[test]
+fn memcpy() {
+    let mut dest = [0; 4];
+    let src = [0xde, 0xad, 0xbe, 0xef];
+
+    for n in 0..dest.len() {
+        dest.copy_from_slice(&[0; 4]);
+
+        unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) }
+
+        assert_eq!(&dest[0..n], &src[0..n])
+    }
+}
+
+#[test]
+fn memcpy4() {
+    let mut aligned = Aligned::new([0; 8]);
+    let dest = &mut aligned.array;
+    let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d];
+
+    for n in 0..dest.len() {
+        dest.copy_from_slice(&[0; 8]);
+
+        unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) }
+
+        assert_eq!(&dest[0..n], &src[0..n])
+    }
+}

+ 274 - 0
tests/aeabi_memset.rs

@@ -0,0 +1,274 @@
+#![cfg(all(target_arch = "arm",
+           not(any(target_env = "gnu", target_env = "musl")),
+           target_os = "linux",
+           feature = "mem"))]
+#![feature(compiler_builtins_lib)]
+#![no_std]
+
+extern crate compiler_builtins;
+
+// test runner
+extern crate utest_cortex_m_qemu;
+
+// overrides `panic!`
+#[macro_use]
+extern crate utest_macros;
+
+use core::mem;
+
+macro_rules! panic {
+    ($($tt:tt)*) => {
+        upanic!($($tt)*);
+    };
+}
+
+extern "C" {
+    fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
+}
+
+struct Aligned {
+    array: [u8; 8],
+    _alignment: [u32; 0],
+}
+
+impl Aligned {
+    fn new(array: [u8; 8]) -> Self {
+        Aligned {
+            array: array,
+            _alignment: [],
+        }
+    }
+}
+
+#[test]
+fn zero() {
+    let mut aligned = Aligned::new([0u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), 0, c)
+    }
+
+    assert_eq!(*xs, [0; 8]);
+
+    let mut aligned = Aligned::new([1u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), 0, c)
+    }
+
+    assert_eq!(*xs, [1; 8]);
+}
+
+#[test]
+fn one() {
+    let mut aligned = Aligned::new([0u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let n = 1;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0, 0, 0, 0, 0, 0, 0]);
+
+    let mut aligned = Aligned::new([1u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 1, 1, 1, 1, 1, 1, 1]);
+}
+
+#[test]
+fn two() {
+    let mut aligned = Aligned::new([0u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let n = 2;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0, 0, 0, 0, 0, 0]);
+
+    let mut aligned = Aligned::new([1u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 1, 1, 1, 1, 1, 1]);
+}
+
+#[test]
+fn three() {
+    let mut aligned = Aligned::new([0u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let n = 3;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 0, 0, 0, 0, 0]);
+
+    let mut aligned = Aligned::new([1u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 1, 1, 1, 1, 1]);
+}
+
+#[test]
+fn four() {
+    let mut aligned = Aligned::new([0u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let n = 4;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0, 0, 0, 0]);
+
+    let mut aligned = Aligned::new([1u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 1, 1, 1, 1]);
+}
+
+#[test]
+fn five() {
+    let mut aligned = Aligned::new([0u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let n = 5;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0, 0, 0]);
+
+    let mut aligned = Aligned::new([1u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 1, 1, 1]);
+}
+
+#[test]
+fn six() {
+    let mut aligned = Aligned::new([0u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let n = 6;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0, 0]);
+
+    let mut aligned = Aligned::new([1u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1, 1]);
+}
+
+#[test]
+fn seven() {
+    let mut aligned = Aligned::new([0u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let n = 7;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0]);
+
+    let mut aligned = Aligned::new([1u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1]);
+}
+
+#[test]
+fn eight() {
+    let mut aligned = Aligned::new([0u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let n = 8;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]);
+
+    let mut aligned = Aligned::new([1u8; 8]);;
+    assert_eq!(mem::align_of_val(&aligned), 4);
+    let xs = &mut aligned.array;
+    let c = 0xdeadbeef;
+
+    unsafe {
+        __aeabi_memset4(xs.as_mut_ptr(), n, c)
+    }
+
+    assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]);
+}