Jelajahi Sumber

Use less RCs in matcher

jD91mZM2 6 tahun lalu
induk
melakukan
4014cb7b8a
1 mengubah file dengan 84 tambahan dan 69 penghapusan
  1. 84 69
      src/matcher.rs

+ 84 - 69
src/matcher.rs

@@ -4,9 +4,8 @@
 use std::prelude::*;
 
 use compile::{Token, Range};
-use std::borrow::{Borrow, Cow};
+use std::borrow::Cow;
 use std::fmt;
-use std::ops::Deref;
 use std::rc::Rc;
 
 /// A regex matcher, ready to match stuff
@@ -38,39 +37,16 @@ impl PosixRegex {
     }
 }
 
-enum CowRc<'a, B: ToOwned + ?Sized> {
-    Borrowed(&'a B),
-    Owned(Rc<<B as ToOwned>::Owned>)
-}
-impl<'a, B: ToOwned + ?Sized> Clone for CowRc<'a, B> {
-    fn clone(&self) -> Self {
-        match self {
-            CowRc::Borrowed(inner) => CowRc::Borrowed(inner),
-            CowRc::Owned(inner) => CowRc::Owned(Rc::clone(inner)),
-        }
-    }
-}
-impl<'a, B: ToOwned + ?Sized> Deref for CowRc<'a, B> {
-    type Target = B;
-
-    fn deref(&self) -> &Self::Target {
-        match self {
-            CowRc::Borrowed(borrow) => borrow,
-            CowRc::Owned(owned) => (**owned).borrow()
-        }
-    }
-}
-
 #[derive(Clone)]
 struct Branch<'a> {
     index: usize,
     repeated: u32,
-    tokens: CowRc<'a, [(Token, Range)]>,
+    tokens: &'a [(Token, Range)],
+    path: Box<[(usize, usize)]>,
 
     group_ids: Rc<[usize]>,
     repeat_min: u32,
     repeat_max: Option<u32>,
-    repeats: Option<Rc<Vec<Rc<Vec<(Token, Range)>>>>>,
     next: Option<Rc<Branch<'a>>>
 }
 impl<'a> fmt::Debug for Branch<'a> {
@@ -86,19 +62,20 @@ impl<'a> Branch<'a> {
         Some(Self {
             index: 0,
             repeated: 0,
-            tokens: CowRc::Borrowed(tokens),
+            tokens: tokens,
+            path: Vec::new().into(),
+
             group_ids: Vec::new().into(),
             repeat_min: 0,
             repeat_max: Some(0),
-            repeats: None,
             next: None
         })
     }
     fn group(
+        path: Box<[(usize, usize)]>,
         group_ids: Rc<[usize]>,
-        tokens: Rc<Vec<(Token, Range)>>,
+        tokens: &'a [(Token, Range)],
         range: Range,
-        repeats: Rc<Vec<Rc<Vec<(Token, Range)>>>>,
         next: Option<Branch<'a>>
     ) -> Option<Self> {
         if tokens.is_empty() {
@@ -107,45 +84,50 @@ impl<'a> Branch<'a> {
         Some(Self {
             index: 0,
             repeated: 0,
-            tokens: CowRc::Owned(tokens),
+            tokens,
+            path,
             group_ids,
             repeat_min: range.0.saturating_sub(1),
             repeat_max: range.1.map(|i| i.saturating_sub(1)),
-            repeats: Some(repeats),
             next: next.map(Rc::new)
         })
     }
-    fn get_token(&self) -> &(Token, Range) {
-        &self.tokens[self.index]
-    }
-    /// Returns if this node is "explored" enough times,
-    /// meaning it has repeated as many times as it want to and has nowhere to go next.
-    fn is_explored(&self) -> bool {
-        let mut branch = Cow::Borrowed(self);
-
-        loop {
-            if branch.repeat_min > 0 {
-                // Group did not repeat enough times!
-                return false;
+    fn parent_tokens(&self) -> &[(Token, Range)] {
+        let mut tokens = self.tokens;
+
+        let len = self.path.len();
+        if len > 0 {
+            for &(index, variant) in &self.path[..len-1] {
+                match tokens[index] {
+                    (Token::Group(ref inner), _) => tokens = &inner[variant],
+                    _ => panic!("non-group index in path")
+                }
             }
+        }
 
-            let (_, Range(min, _)) = *branch.get_token();
-            if branch.repeated < min {
-                return false;
-            }
-            match branch.next_branch() {
-                Some(next) => branch = Cow::Owned(next),
-                None => break
+        tokens
+    }
+    fn tokens(&self) -> &[(Token, Range)] {
+        let mut tokens = self.parent_tokens();
+
+        if let Some(&(index, variant)) = self.path.last() {
+            match tokens[index] {
+                (Token::Group(ref inner), _) => tokens = &inner[variant],
+                _ => panic!("non-group index in path")
             }
         }
-        true
+
+        tokens
+    }
+    fn get_token(&self) -> &(Token, Range) {
+        &self.tokens()[self.index]
     }
     fn next_branch(&self) -> Option<Self> {
         if self.repeat_min > 0 {
             // Don't add the next branch until we've repeated this one enough
             return None;
         }
-        if self.index + 1 >= self.tokens.len() {
+        if self.index + 1 >= self.tokens().len() {
             return self.next.as_ref().map(|inner| (**inner).clone());
         }
         Some(Self {
@@ -158,18 +140,49 @@ impl<'a> Branch<'a> {
         if self.repeat_max.map(|max| max == 0).unwrap_or(false) {
             return;
         }
-        if let Some(ref repeats) = self.repeats {
-            for branch in &**repeats {
-                branches.push(Self {
-                    index: 0,
-                    repeated: 0,
-                    repeat_min: self.repeat_min.saturating_sub(1),
-                    repeat_max: self.repeat_max.map(|max| max - 1),
-                    tokens: CowRc::Owned(Rc::clone(branch)),
-                    ..self.clone()
-                });
+
+        let tokens = self.parent_tokens();
+        let &(index, _) = self.path.last().expect("add_repeats called on top level");
+        match tokens[index] {
+            (Token::Group(ref repeats), _) => {
+                for alternative in 0..repeats.len() {
+                    let mut path = self.path.clone();
+                    path.last_mut().unwrap().1 = alternative;
+
+                    branches.push(Self {
+                        index: 0,
+                        path,
+                        repeated: 0,
+                        repeat_min: self.repeat_min.saturating_sub(1),
+                        repeat_max: self.repeat_max.map(|max| max - 1),
+                        ..self.clone()
+                    });
+                }
+            },
+            _ => panic!("non-group index in path")
+        }
+    }
+    /// Returns if this node is "explored" enough times,
+    /// meaning it has repeated as many times as it want to and has nowhere to go next.
+    fn is_explored(&self) -> bool {
+        let mut branch = Cow::Borrowed(self);
+
+        loop {
+            if branch.repeat_min > 0 {
+                // Group did not repeat enough times!
+                return false;
+            }
+
+            let (_, Range(min, _)) = *branch.get_token();
+            if branch.repeated < min {
+                return false;
+            }
+            match branch.next_branch() {
+                Some(next) => branch = Cow::Owned(next),
+                None => break
             }
         }
+        true
     }
 }
 
@@ -190,19 +203,21 @@ impl<'a> PosixRegexMatcher<'a> {
                 let group_id = self.groups.len();
                 self.groups.push((self.offset, 0));
 
-                let repeats = Rc::new(inner.iter().cloned().map(Rc::new).collect());
-
                 let mut ids = Vec::with_capacity(branch.group_ids.len() + 1);
                 ids.extend(&*branch.group_ids);
                 ids.push(group_id);
                 let ids = ids.into();
 
-                for alternation in &*repeats {
+                for alternation in 0..inner.len() {
+                    let mut path = Vec::with_capacity(branch.path.len() + 1);
+                    path.extend(&*branch.path);
+                    path.push((branch.index, alternation));
+
                     if let Some(branch) = Branch::group(
+                        path.into(),
                         Rc::clone(&ids),
-                        Rc::clone(alternation),
+                        branch.tokens,
                         range,
-                        Rc::clone(&repeats),
                         branch.next_branch()
                     ) {
                         insert.push(branch);