浏览代码

Make IP checksum loop use larger chunks to ease autovectorization.

Josh Gangloff 7 年之前
父节点
当前提交
56ddb0c206
共有 1 个文件被更改,包括 20 次插入2 次删除
  1. 20 2
      src/wire/ip.rs

+ 20 - 2
src/wire/ip.rs

@@ -611,15 +611,33 @@ pub mod checksum {
 
     /// Compute an RFC 1071 compliant checksum (without the final complement).
     pub fn data(mut data: &[u8]) -> u16 {
-        // See RFC 1071 section 4.1 for the original implementation.
-        let mut accum: u32 = 0;
+        let mut accum = 0;
+
+        // For each 32-byte chunk...
+        const CHUNK_SIZE: usize = 32;
+        while data.len() >= CHUNK_SIZE {
+            let mut d = &data[..CHUNK_SIZE];
+            // ... take by 2 bytes and sum them.
+            while d.len() >= 2 {
+                accum += NetworkEndian::read_u16(d) as u32;
+                d = &d[2..];
+            }
+
+            data = &data[CHUNK_SIZE..];
+        }
+
+        // Sum the rest that does not fit the last 32-byte chunk,
+        // taking by 2 bytes.
         while data.len() >= 2 {
             accum += NetworkEndian::read_u16(data) as u32;
             data = &data[2..];
         }
+        
+        // Add the last remaining odd byte, if any.
         if let Some(&value) = data.first() {
             accum += (value as u32) << 8;
         }
+        
         propagate_carries(accum)
     }