浏览代码

Skip null nodes when updating fat values.

Previously, we only short-circuited when the fat value was no longer
changed. When a null node is reached, we stop updating values.

- Rename `VecElems` to `Elements`
- Add additional checks for the backlook on `Seek`
- Remove `prelude`.
- Add guarantee that shortcuts never starts at the current node.
- Format mathematical expressions with LaTeX when enabled.
ticki 8 年之前
父节点
当前提交
2ce8cd156e
共有 5 个文件被更改,包括 96 次插入79 次删除
  1. 19 8
      src/bk/node.rs
  2. 50 40
      src/bk/seek.rs
  3. 20 13
      src/bk/shortcut.rs
  4. 0 11
      src/prelude.rs
  5. 7 7
      src/vec.rs

+ 19 - 8
src/bk/node.rs

@@ -1,6 +1,11 @@
 /// A block list node.
 ///
-/// 
+/// A node consists of three components:
+///
+/// 1. The inner value that the node holds.
+/// 2. A pointer to the next node.
+/// 3. A stack of so called "shortcuts", which contains data about jumping over/searching for
+///    nodes.
 struct Node {
     /// The inner block.
     ///
@@ -18,13 +23,11 @@ struct Node {
     /// This is a stack of linked list nodes, such that any entry has a list which is a superset of
     /// the latter. The lowest layer is a subset of the block list itself.
     ///
-    /// ```
-    /// ...
-    /// 2      # ---------------------> [6] ---------------------> [9] -------------> NIL
-    /// 1      # ---------------------> [6] ---> [7] ------------> [9] -------------> NIL
-    /// 0      # ------------> [5] ---> [6] ---> [7] ------------> [9] ---> [10] ---> NIL
-    /// bottom # ---> [1] ---> [5] ---> [6] ---> [7] ---> [8] ---> [9] ---> [10] ---> NIL
-    /// ```
+    ///     ...
+    ///     2      # ---------------------> [6] ---------------------> [9] -------------> NIL
+    ///     1      # ---------------------> [6] ---> [7] ------------> [9] -------------> NIL
+    ///     0      # ------------> [5] ---> [6] ---> [7] ------------> [9] ---> [10] ---> NIL
+    ///     bottom # ---> [1] ---> [5] ---> [6] ---> [7] ---> [8] ---> [9] ---> [10] ---> NIL
     ///
     /// As a result the lowest entry is the most dense.
     ///
@@ -34,6 +37,14 @@ struct Node {
 }
 
 impl Node {
+    /// Insert a new node after this node.
+    fn insert(&mut self, new_node: Jar<Node>) {
+        take::replace_with(self, |node| {
+            new_node.next = Some(node);
+            new_node
+        });
+    }
+
     // TODO: Implement `IntoIterator`.
     fn iter(&mut self) -> PoolIter {
         PoolIter {

+ 50 - 40
src/bk/seek.rs

@@ -5,22 +5,25 @@ struct Seek<'a> {
     ///
     /// The lower indexes are the denser layers.
     ///
+    /// # An important note!
+    ///
+    /// It is crucial that the backlook is pointers to shortcuts _before_ the target, not shortcuts
+    /// starting at the target.
+    ///
     /// # Example
     ///
     /// Consider if we search for 8. Now, we move on until we overshoot. The node before the
     /// overshot is a skip (marked with curly braces).
     ///
-    /// ```
-    /// ...
-    /// 2      # ---------------------> {6} ---------------------> [9] -------------> NIL
-    /// 1      # ---------------------> [6] ---> {7} ------------> [9] -------------> NIL
-    /// 0      # ------------> [5] ---> [6] ---> {7} ------------> [9] ---> [10] ---> NIL
-    /// bottom # ---> [1] ---> [5] ---> [6] ---> [7] ---> [8] ---> [9] ---> [10] ---> NIL
-    /// ```
+    ///     ...
+    ///     2      # ---------------------> {6} ---------------------> [9] -------------> NIL
+    ///     1      # ---------------------> [6] ---> {7} ------------> [9] -------------> NIL
+    ///     0      # ------------> [5] ---> [6] ---> {7} ---> [8] ---> [9] ---> [10] ---> NIL
+    ///     bottom # ---> [1] ---> [5] ---> [6] ---> [7] ---> [8] ---> [9] ---> [10] ---> NIL
     ///
-    /// So, the back look of this particular seek is `[6, 7, 7, ...]`.
+    /// So, the lookback of this particular seek is `[6, 7, 7, ...]`.
     // FIXME: Find a more rustic way than raw pointers.
-    back_look: [Pointer<Shortcuts>; LEVELS.0],
+    lookback: [Pointer<Shortcuts>; LEVELS.0],
     /// A pointer to a pointer to the found node.
     ///
     /// This is the node equal to or less than the target. The two layers of pointers are there to
@@ -35,7 +38,7 @@ impl<'a> Seek<'a> {
     #[inline]
     fn update_fat(&mut self, size: block::Size, above: shortcut::Level) {
         // Go from the densest layer and up, to update the fat node values.
-        for i in self.back_look.iter_mut().skip(above) {
+        for i in self.lookback.iter_mut().skip(above) {
             if !i.update_fat(size) {
                 // Short-circuit for performance reasons.
                 break;
@@ -104,9 +107,9 @@ impl<'a> Seek<'a> {
             //     # -5-------------------> [7] -7-> [17] -17---------> [6] -10-----------> NIL
             //     # -5----------> [1] -1-> [7] -7-> [17] -17---------> [6] -6-> [10] -10-> NIL
             //     # -5-> [5] -1-> [1] -1-> [7] -7-> [17] -17---------> [6] -6-> [10] -10-> NIL
-            // Fortunately, we know the back look of this particular seek, so we can simply iterate
+            // Fortunately, we know the lookback of this particular seek, so we can simply iterate
             // and set:
-            for (i, shortcut) in self.back_look.iter().zip(i.next.shortcuts) {
+            for (i, shortcut) in self.lookback.iter().zip(i.next.shortcuts) {
                 // Update the shortcut to skip over the next shortcut. Note that this statements
                 // makes it impossible to shortcut like in `insert`.
                 i.next = shortcut.next;
@@ -135,8 +138,8 @@ impl<'a> Seek<'a> {
     // needed to update it.
     fn update_shortcut(&mut self, lv: shortcut::Level) -> Pointer<Shortcut> {
         // Make the old shortcut point to `self.node`.
-        let old_next = mem::replace(&mut self.back_look[lv].next, Some(self.node));
-        mem::replace(&mut self.back_look[lv], Shortcut {
+        let old_next = mem::replace(&mut self.lookback[lv].next, Some(self.node));
+        mem::replace(&mut self.lookback[lv], Shortcut {
             next: old_next,
             fat: self.node.block.size(),
         });
@@ -156,22 +159,16 @@ impl<'a> Seek<'a> {
             // Check that we're inserting at a fitting position, such that the list is kept sorted.
             debug_assert!(self.node.block < block, "Inserting at a wrong position.");
 
+            // Generate the maximum level of the new node's shortcuts.
+            let height = shortcut::Level::generate();
+
             // Put the old node behind the new node holding block.
-            let mut new_node = arena.alloc(Node {
+            seek.node.insert(arena.alloc(Node {
                 block: block,
                 // Place-holder.
                 shortcuts: Default::default(),
                 next: None,
-            });
-
-            // Generate the maximum level of the new node's shortcuts.
-            let height = shortcut::Level::generate();
-
-            // Insert the new node up front, shifting the rest forward.
-            take::replace_with(seek.node, |node| {
-                new_node.next = Some(node);
-                new_node
-            });
+            }));
 
             // If we actually have a bottom layer (i.e. the generated level is higher than zero),
             // we obviously need to update its fat values based on the main list.
@@ -186,7 +183,7 @@ impl<'a> Seek<'a> {
                 // Calculate the fat value of the bottom layer.
                 let new_fat = seek.node.calculate_fat_value_bottom();
 
-                let skip = &mut seek.back_look[0];
+                let skip = &mut seek.lookback[0];
                 if new_fat == skip.fat {
                     if let Some(next) = skip.next {
                         next.fat = next.calculate_fat_value_bottom();
@@ -230,7 +227,7 @@ impl<'a> Seek<'a> {
                 // optimizations forever.
 
                 // Avoid multiple bound checks.
-                let skip = &mut seek.back_look[lv];
+                let skip = &mut seek.lookback[lv];
                 // The shortcut behind the updated one might be invalidated as well. We use a nifty
                 // trick: If the fat node is not present on one part of the split (defined by the
                 // newly inserted node), it must fall on the other. So, we can shortcut and safely
@@ -290,27 +287,40 @@ impl<'a> Seek<'a> {
                 i.check();
             }
 
-            // Check the back look.
-            let mut iter = self.back_look().peekable();
+            // Make sure that the first lookback entry is overshooting the node as expected.
+            assert!(self.lookback[0].next.and_then(|x| x.block) >= self.node.block, "The first \
+                    lookback entry is not overshooting the node of the seek.");
+
+            // Check the lookback.
+            let mut iter = self.lookback.peekable();
             let mut n = 0;
             loop {
                 let cur = iter.next();
                 let next = iter.peek();
 
-                if let (Some(cur), Some(next)) = (cur, next) {
-                    // The fat value satisfy the heap property, and thus must be ordered as such.
-                    assert!(cur.fat <= next.fat, "The {}'th back look entry has a fat value higher \
-                            than its parent level, which ought to be less dense.", n);
-                    // The next layer should be less dense, as such, the pointer is lower than the
-                    // current one.
-                    assert!(cur.next >= next.next, "The {}'th back look entry's next-node pointer \
-                            is lower than the parent level's pointer, despite that it ought to be \
-                            denser.", n);
-
-                    n += 1;
+                if let Some(cur) = cur {
+                    // Make sure the shortcut doesn't start at the node (this is done by making
+                    // sure the lookback entry and the n'th shortcut of the current node are
+                    // distinct).
+                    assert!(cur.next != self.node.shortcuts[n].next, "The {}'th lookback entry \
+                            starts at the target node.");
+
+                    if let Some(next) = next {
+                        // The fat value satisfy the heap property, and thus must be ordered as such.
+                        assert!(cur.fat <= next.fat, "The {}'th lookback entry has a fat value higher \
+                                than its parent level, which ought to be less dense.", n);
+                        // The next layer should be less dense, as such, the pointer is lower than the
+                        // current one.
+                        assert!(cur.next >= next.next, "The {}'th lookback entry's next-node pointer \
+                                is lower than the parent level's pointer, despite that it ought to be \
+                                denser.", n);
+                    }
                 } else {
                     break;
                 }
+
+                // Increment the counter (go to the next lookback entry).
+                n += 1;
             }
         }
     }

+ 20 - 13
src/bk/shortcut.rs

@@ -16,7 +16,7 @@ impl Level {
     /// needed.
     #[inline]
     fn generate_level() -> Level {
-        // Naturally, the ffz conforms to our wanted probability distribution, $p(x) = 2^{-x}$a. We
+        // Naturally, the ffz conforms to our wanted probability distribution, $$p(x) = 2^{-x}$$. We
         // apply a bit mask to saturate when the ffz is greater than `LEVELS`.
         (random::get() & (1 << LEVELS - 1)).trailing_zeros()
     }
@@ -47,7 +47,7 @@ struct Shortcut {
 impl Shortcut {
     #[inline]
     fn is_null(&self) -> bool {
-        self.next.is_null()
+        self.fat == 0
     }
 
     /// Update the fat value in case the new node is bigger than the current fat node.
@@ -60,7 +60,10 @@ impl Shortcut {
     /// # Short-circuiting
     ///
     /// The returned value indicates if the caller should continue propagating new fat value up.
-    /// This is based on the observation that updating the fat value is similar to heap insertion.
+    /// This can be either because the updated fat value is equal to old fat value, or that the
+    /// shortcut was null (and thus all higher shortcuts are too).
+    ///
+    /// The former is based on the observation that updating the fat value is similar to heap insertion.
     ///
     /// Consider insertion a block of size 4:
     ///     [6] -6-------------------> ...
@@ -80,17 +83,21 @@ impl Shortcut {
     ///         4
     ///         |
     ///         2
-    /// Let _A_ be a node with set of children _C_, then_ A = max(C)_. As such, if I start in the
-    /// bottom and iterate upwards, as soon as the value stays unchanged, the rest of the values
-    /// won't change either.
+    /// Let $$A$$ be a node with set of children $$C$$, then $$A = \max(C)$$. As such, if I start
+    /// in the bottom and iterate upwards, as soon as the value stays unchanged, the rest of the
+    /// values won't change either.
     #[inline]
-    fn update_fat(&mut self, new_node: block::Size) -> bool {
-        let res = self.fat <= self.node.block.size();
-
-        // We max them with the new block size to ensure they're properly set.
-        self.fat = cmp::max(i.fat, self.node.block.size());
-
-        res
+    fn update_fat(&mut self, new_size: block::Size) -> bool {
+        if self.fat < new_size && !self.is_null() {
+            // The fat value is smaller than the new size and thus an update is required.
+            self.fat = new_size;
+
+            true
+        } else {
+            // Since the old fat value is either not smaller than or empty, we can safely
+            // shortcircuits (see the notes layed out in the documentation comment).
+            false
+        }
     }
 }
 

+ 0 - 11
src/prelude.rs

@@ -1,11 +0,0 @@
-//! Frequently used imports.
-
-// TODO: Reconsider this. Is this an anti-pattern?
-
-pub use block::Block;
-pub use cell::MoveCell;
-pub use lazy_init::LazyInit;
-pub use leak::Leak;
-pub use ptr::{Pointer, Jar, Uninit};
-pub use sync::Mutex;
-pub use vec::Vec;

+ 7 - 7
src/vec.rs

@@ -5,7 +5,7 @@ use prelude::*;
 use core::{slice, ops, mem, ptr};
 
 // Derive the length newtype.
-usize_newtype!(pub VecElem);
+usize_newtype!(pub Elements);
 
 /// A low-level vector primitive.
 ///
@@ -17,11 +17,11 @@ pub struct Vec<T: Leak> {
     /// The capacity of the buffer.
     ///
     /// This demonstrates the lengths before reallocation is necessary.
-    cap: VecElem,
+    cap: Elements,
     /// The length of the vector.
     ///
     /// This is the number of elements from the start, that is initialized, and can be read safely.
-    len: VecElem,
+    len: Elements,
 }
 
 impl<T: Leak> Vec<T> {
@@ -32,7 +32,7 @@ impl<T: Leak> Vec<T> {
     /// This is unsafe, since it won't initialize the buffer in any way, possibly breaking type
     /// safety, memory safety, and so on. Thus, care must be taken upon usage.
     #[inline]
-    pub unsafe fn from_raw_parts(block: Block, len: VecElem) -> Vec<T> {
+    pub unsafe fn from_raw_parts(block: Block, len: Elements) -> Vec<T> {
         Vec {
             len: len,
             cap: block.size() / mem::size_of::<T>(),
@@ -77,7 +77,7 @@ impl<T: Leak> Vec<T> {
 
     /// Get the capacity of this vector.
     #[inline]
-    pub fn capacity(&self) -> VecElem {
+    pub fn capacity(&self) -> Elements {
         self.cap
     }
 
@@ -128,12 +128,12 @@ impl<T: Leak> Vec<T> {
 
     /// Truncate this vector.
     ///
-    /// This is O(1).
+    /// This is $$O(1)$$.
     ///
     /// # Panics
     ///
     /// Panics on out-of-bound.
-    pub fn truncate(&mut self, len: VecElem) {
+    pub fn truncate(&mut self, len: Elements) {
         // Bound check.
         assert!(len <= self.len, "Out of bound.");