浏览代码

Clean up matcher

jD91mZM2 6 年之前
父节点
当前提交
ca7fee9161
共有 1 个文件被更改,包括 16 次插入17 次删除
  1. 16 17
      src/matcher.rs

+ 16 - 17
src/matcher.rs

@@ -156,12 +156,10 @@ impl<'a> Branch<'a> {
     }
     fn add_repeats(&self, branches: &mut Vec<Branch<'a>>) {
         if self.repeat_max.map(|max| max == 0).unwrap_or(false) {
-            //println!("Don't add repeats, cuz repeat_max = {:?}", self.repeat_max);
             return;
         }
         if let Some(ref repeats) = self.repeats {
             for branch in &**repeats {
-                //println!("REPEAT!");
                 branches.push(Self {
                     index: 0,
                     repeated: 0,
@@ -188,6 +186,7 @@ impl<'a> PosixRegexMatcher<'a> {
             let (ref token, range) = *branch.get_token();
 
             if let Token::Group(ref inner) = token {
+                // Push the group's inner content as a new branch
                 let group_id = self.groups.len();
                 self.groups.push((self.offset, 0));
 
@@ -197,6 +196,7 @@ impl<'a> PosixRegexMatcher<'a> {
                 ids.extend(&*branch.group_ids);
                 ids.push(group_id);
                 let ids = ids.into();
+
                 for alternation in &*repeats {
                     if let Some(branch) = Branch::group(
                         Rc::clone(&ids),
@@ -205,14 +205,13 @@ impl<'a> PosixRegexMatcher<'a> {
                         Rc::clone(&repeats),
                         branch.next_branch()
                     ) {
-                        //println!("{:?} ---[G Cloned]--> {:?}", token, branch.get_token());
                         insert.push(branch);
                     }
                 }
             }
             if branch.repeated >= range.0 {
+                // Push the next element as a new branch
                 if let Some(next) = branch.next_branch() {
-                    //println!("{:?} ---[Cloned]--> {:?}", token, next.get_token());
                     insert.push(next);
                 }
                 branch.add_repeats(&mut insert);
@@ -220,6 +219,7 @@ impl<'a> PosixRegexMatcher<'a> {
         }
 
         if !insert.is_empty() {
+            // Resolve recursively
             let mut new = self.expand(&insert);
             insert.append(&mut new);
         }
@@ -234,7 +234,6 @@ impl<'a> PosixRegexMatcher<'a> {
 
         loop {
             let next = self.input.get(self.offset).cloned();
-            //println!();
 
             let mut index = 0;
             let mut remove = 0;
@@ -242,12 +241,13 @@ impl<'a> PosixRegexMatcher<'a> {
             let mut insert = self.expand(&branches);
             branches.append(&mut insert);
 
-            //println!("Branches: {:?}", branches);
             loop {
                 if index >= branches.len() {
                     break;
                 }
                 if remove > 0 {
+                    // Just like Rust's `retain` function, shift all elements I
+                    // want to keep back and `truncate` when I'm done.
                     branches.swap(index, index-remove);
                 }
                 let branch = &mut branches[index-remove];
@@ -255,10 +255,10 @@ impl<'a> PosixRegexMatcher<'a> {
 
                 let (ref token, Range(_, mut max)) = *branch.get_token();
                 let mut token = token;
-                //println!("Does {:?} match {:?}?", token, next.map(|c| c as char));
 
                 let mut accepts = true;
 
+                // Step 1: Handle zero-width stuff like ^ and \<
                 loop {
                     match token {
                         Token::Start |
@@ -277,8 +277,6 @@ impl<'a> PosixRegexMatcher<'a> {
                             token = new_token;
                             max = new_max;
 
-                            //println!("Or {:?}", token);
-
                             accepts = match original {
                                 Token::Start => self.offset == 0,
                                 Token::WordEnd => next.map(::ctype::is_word_boundary).unwrap_or(true),
@@ -290,8 +288,7 @@ impl<'a> PosixRegexMatcher<'a> {
                     }
                 }
 
-                //println!("Accepts before: {:?}", accepts);
-
+                // Step 2: Check if the token matches
                 accepts = accepts && match *token {
                     Token::Any => next.is_some(),
                     Token::Char(c) => next == Some(c),
@@ -302,24 +299,23 @@ impl<'a> PosixRegexMatcher<'a> {
                     } else { false },
 
                     // These will only get called if they are encountered at
-                    // EOF, for example "abc\>" or "^". Then we simply want to
-                    // return true as to preserve the current `accepts` status.
+                    // EOF (because next_branch returns None), for example
+                    // "abc\>" or "^". Then we simply want to return true as to
+                    // preserve the current `accepts` status.
                     Token::Start |
                     Token::WordEnd |
                     Token::WordStart => true
                 };
 
-                //println!("Accepts after: {:?}", accepts);
-
                 if !accepts || max.map(|max| branch.repeated >= max).unwrap_or(false) {
                     succeeded = succeeded || branch.is_explored();
-                    //println!("-> Deleted! Succeeded: {}", succeeded);
                     for &id in &*branch.group_ids {
                         self.groups[id].1 = self.offset;
                     }
                     remove += 1;
                     continue;
                 }
+
                 branch.repeated += 1;
             }
             let end = branches.len() - remove;
@@ -354,7 +350,6 @@ mod tests {
     use ::PosixRegexBuilder;
 
     fn matches_exact(regex: &str, input: &str) -> Option<PosixRegexResult> {
-        //println!("----- TRYING TO MATCH {:?} AND {:?}", regex, input);
         PosixRegexBuilder::new(regex.as_bytes())
             .with_default_classes()
             .compile()
@@ -430,6 +425,10 @@ mod tests {
             matches_exact(r"hello\( \(world\|universe\) :D\)\?", "hello world :D"),
             Some(PosixRegexResult { start: 0, end: 14, groups: vec![(5, 14), (6, 11)] })
         );
+        assert_eq!(
+            matches_exact(r"\(\<hello\>\) world", "hello world"),
+            Some(PosixRegexResult { start: 0, end: 11, groups: vec![(0, 5)] })
+        );
     }
     #[test]
     fn start_and_end() {