seek.rs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. /// A "seek".
  2. ///
  3. /// Seek represents the a found node ("needle") and backtracking information ("lookback").
  4. struct Seek<'a> {
  5. /// The last node below our target for each level.
  6. ///
  7. /// This is used for insertions and other backtracking.
  8. ///
  9. /// The lower indexes are the denser layers.
  10. ///
  11. /// # An important note!
  12. ///
  13. /// It is crucial that the backlook is pointers to nodes _before_ the target, not the target
  14. /// node itself.
  15. ///
  16. /// # Example
  17. ///
  18. /// Consider if we search for 8. Now, we move on until we overshoot. The node before the
  19. /// overshot is a lookback entry (marked with curly braces).
  20. ///
  21. /// ...
  22. /// 2 # ---------------------> {6} ---------------------> [9] -------------> NIL
  23. /// 1 # ---------------------> [6] ---> {7} ------------> [9] -------------> NIL
  24. /// 0 # ------------> [5] ---> [6] ---> {7} ---> [8] ---> [9] ---> [10] ---> NIL
  25. /// bottom # ---> [1] ---> [5] ---> [6] ---> [7] ---> [8] ---> [9] ---> [10] ---> NIL
  26. ///
  27. /// So, the lookback of this particular seek is `[6, 7, 7, ...]`.
  28. // FIXME: Find a more rustic way than raw pointers.
  29. lookback: lv::Array<Pointer<Node>>,
  30. /// A pointer to a pointer to the found node.
  31. ///
  32. /// This is the node equal to or less than the target. The two layers of pointers are there to
  33. /// make it possible to modify the links and insert nodes before our target.
  34. node: &'a mut Jar<Node>,
  35. }
  36. impl<'a> Seek<'a> {
  37. /// Update the fat values of this seek to be higher than or equal to some new block size. .
  38. ///
  39. /// This will simply run over and update the fat values of shortcuts of some level and above
  40. /// with a new size.
  41. ///
  42. /// Note that this cannot be used to remove a block, since that might decrease some fat values.
  43. #[inline]
  44. fn increase_fat(&mut self, size: block::Size, from: lv::Level) {
  45. // Start after `above` and go up, to update the fat node values.
  46. for i in &mut self.skips_from(above) {
  47. if !i.increase_fat(size) {
  48. // Short-circuit for performance reasons.
  49. break;
  50. }
  51. }
  52. }
  53. /// Get the `lv`'th "skip" of this seek.
  54. ///
  55. /// The $$n$$'th shortcut of the $$n$$'th entry of the lookback is referred to as the $$n$$'th
  56. /// "skip", because it _skips_ over the target node.
  57. fn get_skip(&mut self, lv: lv::Level) -> &mut Shortcut {
  58. &mut self.lookback[lv].shorcuts[lv]
  59. }
  60. /// Create an iterator over all of the skips of this seek.
  61. fn skips(&mut self) -> impl Iterator<Item = &mut Shortcut> {
  62. self.skips_from(Level::min())
  63. }
  64. /// Create an iterator over the skips of this seek starting at some level.
  65. fn skips_from(&mut self, lv: lv::Level) -> impl Iterator<Item = &mut Shortcut> {
  66. Skips {
  67. seek: self,
  68. n: lv::Iter::start_at(lv),
  69. }
  70. }
  71. fn put(&mut self, block: Block, arena: &mut Arena<Node>) {
  72. if self.node.block.merge_right(block).is_ok() {
  73. // Merge suceeded:
  74. // [==block==]
  75. // [==self==] [==rest==]
  76. // is now:
  77. // [==self=============] [==rest==]
  78. // Update the fat values.
  79. self.increase_fat(self.node.block.size(), Level::min());
  80. // If our configuration now looks like:
  81. // [==self=============][==rest==]
  82. // We need to maximize the former level, so we merge right.
  83. if self.try_merge_right(arena).is_ok() { return; }
  84. // Note that we do not merge our block to the seeked block from the left side. This is due
  85. // to the seeked block being strictly less than our target, and thus we go one forward to
  86. // the next block to see if it is mergable by its left side.
  87. } else if self.node.next.and_then(|x| block.merge_right(x)).is_ok() {
  88. // Merge suceeded:
  89. // [==block==]
  90. // [==self==] [==right==]
  91. // is now:
  92. // [==self==] [==right=============]
  93. // Update the fat values.
  94. self.increase_fat(block.size(), Level::min());
  95. // In case that our new configuration looks like:
  96. // [==self==][==right=============]
  97. // We need to merge the former block right:
  98. self.try_merge_right(arena);
  99. } else {
  100. // We were unabled to merge it with a preexisting block, so we need to insert it
  101. // instead.
  102. self.insert_no_merge(block, arena);
  103. }
  104. }
  105. fn try_merge_right(&mut self, arena: &mut Arena<Node>) {
  106. if self.node.block.merge_right(self.node.next.block).is_ok() {
  107. // We merged our node left. This means that the node is simply extended and an empty
  108. // block will be left right to the node. As such the fat node's size is greater than or
  109. // equal to the new, merged node's size. So we need not full reevaluation of the fat
  110. // values, instead we can simply climb upwards and max the new size together.
  111. for (i, shortcut) in self.skips().zip(i.next.shortcuts) {
  112. // Update the shortcut to skip over the next shortcut. Note that this statements
  113. // makes it impossible to shortcut like in `insert`.
  114. i.next = shortcut.next;
  115. // Update the fat value to make sure it is greater or equal to the new block size.
  116. // Note that we do not short-circuit -- on purpose -- due to the above statement
  117. // being needed for correctness.
  118. i.increase_fat(self.node.block.size(), Level::min());
  119. // TODO: Consider breaking this loop into two loops to avoid too many fat value
  120. // updates.
  121. }
  122. // Finally, replace the useless node, and free it to the arena.
  123. arena.free(mem::replace(self.node.next, self.node.next.unwrap().next));
  124. }
  125. }
  126. // Put a new shortcut starting at the current node at some level.
  127. //
  128. // This will simply insert a shortcut on level `lv`, spanning the old shortcut to `self.node`.
  129. //
  130. // The old shortcut is updated to point to a shortcut representing `self.node`, but the fat
  131. // value is kept as-is.
  132. //
  133. // The new shortcut's fat value will be set to zero, and recomputation is likely needed to
  134. // update it.
  135. //
  136. // # Illustrated
  137. //
  138. // We go from:
  139. // [self.lookback[lv]] -f-> [self.lookback[lv].next]
  140. // To:
  141. // [self.lookback[lv]] -f-> [self.node] -0-> [old self.lookback[lv].next]
  142. fn insert_shortcut(&mut self, lv: lv::Level) -> Pointer<Shortcut> {
  143. debug_assert!(self.node.shortcuts[lv].is_null(), "Overwriting a non-null shortcut.");
  144. // Make the old shortcut point to `self.node`.
  145. let old_next = mem::replace(&mut self.get_skip(lv).next, Some(self.node));
  146. // Update the shortcut of our node to point to the old shortcut's next node.
  147. self.node.shortcuts[lv] = Shortcut {
  148. next: old_next,
  149. fat: block::Size(0),
  150. };
  151. }
  152. /// Insert a block (no merge) _after_ the found node.
  153. ///
  154. /// This will simply insert (place) the node after our found node, without merges. The fat
  155. /// values are updated.
  156. fn insert_no_merge(&mut self, block: Block, arena: &mut Arena<Node>) {
  157. take::replace_with(self, |mut seek| {
  158. // Make sure that there are no adjacent blocks.
  159. debug_assert!(!self.node.left_to(&block), "Inserting left-adjacent block without \
  160. merge.");
  161. debug_assert!(!self.node.next.map_or(false, |x| x.left_to(&block)), "Inserting \
  162. right-adjacent block without merge.");
  163. // Check that we're inserting at a fitting position, such that the list is kept sorted.
  164. debug_assert!(self.node.block < block, "Inserting at a wrong position.");
  165. // Put the old node behind the new node holding block.
  166. seek.node.insert(arena.alloc(Node {
  167. block: block,
  168. // Place-holder.
  169. shortcuts: Default::default(),
  170. next: None,
  171. }));
  172. // Generate the maximum level of the new node's shortcuts.
  173. if let Some(max_lv) = lv::Level::generate() {
  174. // If we actually have a bottom layer (i.e. the generated level is higher than zero),
  175. // we obviously need to update its fat values based on the main list.
  176. // FIXME: This code is copied from the loop below. Find a way to avoid repeating.
  177. // Place the node into the bottom shortcut.
  178. self.insert_shortcut(lv::Level::min());
  179. // For details how this works, see the loop below. This is really just taken from
  180. // the iteration from that to reflect the special structure of the block list.
  181. // Calculate the fat value of the bottom layer.
  182. let new_fat = seek.node.calculate_fat_value_bottom();
  183. let skip = &mut seek.get_skip(lv::Level::min());
  184. if new_fat == skip.fat {
  185. if let Some(next) = skip.next {
  186. next.fat = next.calculate_fat_value_bottom();
  187. }
  188. } else {
  189. skip.node.shortcuts[0].increase_fat(mem::replace(&mut skip.fat, new_fat));
  190. }
  191. // Place new shortcuts up to this level.
  192. for lv in lv::Iter::non_bottom().to(max_lv) {
  193. // Place the node inbetween the shortcuts.
  194. seek.insert_shortcut(lv);
  195. // The configuration (at level `lv`) should now look like:
  196. // [seek.node] --> [old self.node] --> [old shortcut]
  197. // Since we inserted a new shortcut here, we might have invalidated the old fat
  198. // value, so we need to recompute the fat value. Fortunately, since we go from
  199. // densest to opaquer layer, we can base the fat value on the values of the
  200. // previous layer.
  201. // An example state could look like this: Assume we go from this configuration:
  202. // [a] -y----------> [b] ---> ...
  203. // [a] -x----------> [b] ---> ...
  204. // ...
  205. // To this:
  206. // [a] -y'---------> [b] ---> ...
  207. // [a] -p-> [n] -q-> [b] ---> ...
  208. // ...
  209. // Here, you cannot express _p_ and _q_ in terms of _x_ and _y_. Instead, we need
  210. // to compute them from the layer below.
  211. let new_fat = seek.node.calculate_fat_value_non_bottom(lv);
  212. // You have been visitied by the silly ferris.
  213. // () ()
  214. // \/\/\/
  215. // &_^^_&
  216. // \ /
  217. // Fix all bugs in 0.9345 seconds, or you will be stuck doing premature
  218. // optimizations forever.
  219. // Avoid multiple bound checks.
  220. let skip = &mut seek.get_skip(lv);
  221. // The shortcut behind the updated one might be invalidated as well. We use a nifty
  222. // trick: If the fat node is not present on one part of the split (defined by the
  223. // newly inserted node), it must fall on the other. So, we can shortcut and safely
  224. // skip computation of the second part half of the time.
  225. if new_fat == skip.fat {
  226. if let Some(next) = skip.next {
  227. // The fat value was left unchanged. This means that we need to recompute the
  228. // next segment's fat value.
  229. next.fat = next.calculate_fat_value_non_bottom(lv);
  230. }
  231. // Since it was unchanged, there is no need for setting the value to what it
  232. // already is!
  233. } else {
  234. // The first segment did not contain the fat node! This means that we know a
  235. // priori that the second segment contains the old fat node, i.e. has either
  236. // the same fat value or the updated node is itself the fat node. So, we
  237. // replace the new fat value and update the next segment with the old one. As
  238. // an example, suppose the configuration looked like:
  239. // [a] -f----------> [b]
  240. // And we inserted a new node $x$:
  241. // [a] -g-> [x] -h-> [b]
  242. // Since the fat node (size $f$) couldn't be found on the left side ($g$) of
  243. // $x$, it must be on the right side ($h ≥ f$). It might not be equal to it,
  244. // because the size of $x$ could be greater than $f$, so we take the maximal of
  245. // $x$ and $f$ and assigns that to $h$. This way we avoid having to iterate
  246. // over all the nodes between $x$ and $b$.
  247. skip.next.unwrap().shortcuts[lv]
  248. .increase_fat(cmp::max(mem::replace(&mut skip.fat, new_fat), seek.node.block.size()));
  249. }
  250. }
  251. // The levels above the inserted shortcuts need to get there fat value updated, since a
  252. // new node was inserted below. Since we only inserted (i.e. added a potentially new
  253. // fat node), we simply need to max the old (unupdated) fat values with the new block's
  254. // size.
  255. if let Some(above) = max_lv.above() {
  256. seek.increase_fat(seek.node.block.size(), above);
  257. }
  258. }
  259. seek.node.check();
  260. seek
  261. });
  262. self.check();
  263. }
  264. fn remove(self) -> Jar<Node> {
  265. // Remove the shortcuts that skips the target node (exclude the node from the skip of every
  266. // level). This is in place to make sure there's no dangling pointers after.
  267. for (_, skip) in self.node.shortcuts().zip(self.skips()) {
  268. // Jump over the skip's next node (note how the shortcut will never start at
  269. // `self.node` and hence always be the one we remove here). We can safely unwrap this,
  270. // because we know that `self.node` is at least of this height, by the zip iterator
  271. // adapter.
  272. skip.next = skip.next.unwrap().next;
  273. }
  274. // Update the fat values to reflect the new state.
  275. // TODO: This can maybe be done without indexes.
  276. for lv in lv::Iter::all() {
  277. if self.lookback[lv].shortcuts[lv].fat == self.lookback[lv].block.size() {
  278. // Recalculate the fat value.
  279. let old_fat = self.lookback[lv].shortcuts[lv].fat;
  280. self.lookback[lv].shortcuts[lv].fat = i.calculate_fat_value_non_bottom(lv);
  281. if old_fat == self.lookback[lv].fat {
  282. // The fat value was unchanged (i.e. there are multiple fat nodes), so we
  283. // shortcircuit, because these duplicates will exist in higher layers too (due
  284. // to the heap property).
  285. // TODO: Is heap property the right term here? It isn't technically simply due
  286. // to the heap property...
  287. break;
  288. }
  289. } else {
  290. // Since the node we're removing is not the same size as the fat node, it cannot be
  291. // a fat node. Due to the heap property of the fat values in the lookback, we can
  292. // shortcircuit (if we didn't remove the fat node in this layer, we didn't either
  293. // on any of the later layers).
  294. break;
  295. }
  296. }
  297. // We use the lowest layer in the lookback to use as offset for our search for the node
  298. // before `self.node`. We need to find said node to avoid having dangling pointers to
  299. // `self.node`.
  300. let before_node = self.lookback[0].iter().take_while(|x| x.block < self.node).last();
  301. // Remove the next link to skip the current node.
  302. before_node.next = self.node.next.take();
  303. self.node
  304. }
  305. /// Check this seek.
  306. ///
  307. /// This is NOOP in release mode.
  308. fn check(&self) {
  309. if cfg!(debug_assertions) {
  310. // Check the nodes.
  311. for i in self.node.iter() {
  312. i.check();
  313. }
  314. // Make sure that the first skip is overshooting the node as expected.
  315. assert!(self.get_skip(0).next.and_then(|x| x.block) >= self.node.block, "The first \
  316. skip is not overshooting the node of the seek.");
  317. // Check the lookback.
  318. let mut iter = self.skips().peekable();
  319. let mut n = 0;
  320. loop {
  321. let cur = iter.next();
  322. let next = iter.peek();
  323. if let Some(cur) = cur {
  324. // Make sure the shortcut doesn't start at the node (this is done by making
  325. // sure the skip and the $n$'th shortcut of the current node are distinct).
  326. assert!(cur.next != self.node.shortcuts[n].next, "The {}'th skip starts at \
  327. the target node.");
  328. if let Some(next) = next {
  329. // The fat value satisfy the heap property, and thus must be ordered as such.
  330. assert!(cur.fat <= next.fat, "The {}'th skip has a fat value higher than \
  331. its parent level, which ought to be less dense.", n);
  332. // The next layer should be less dense, as such, the pointer is lower than the
  333. // current one.
  334. assert!(cur.next >= next.next, "The {}'th skip's next-node pointer is \
  335. lower than the parent level's pointer, despite that it ought to be \
  336. denser.", n);
  337. }
  338. } else {
  339. break;
  340. }
  341. // Increment the counter (go to the next skip).
  342. n += 1;
  343. }
  344. }
  345. }
  346. }
  347. struct Skips<'a> {
  348. seek: &'a Seek<'a>,
  349. levels: lv::Iter,
  350. }
  351. impl<'a> Iterator for Skips<'a> {
  352. type Iter = &'a mut Shortcut;
  353. fn next(&mut self) -> Option<&'a mut Shortcut> {
  354. // Progress the level iterator.
  355. if let Some(lv) = self.levels.next() {
  356. // Return the skip.
  357. Some(self.seek.get_skip(lv))
  358. } else {
  359. None
  360. }
  361. }
  362. }