ソースを参照

Fix infinite loop and zero-width matches

jD91mZM2 6 年 前
コミット
253f09b955
2 ファイル変更28 行追加11 行削除
  1. 1 0
      src/lib.rs
  2. 27 11
      src/matcher.rs

+ 1 - 0
src/lib.rs

@@ -14,6 +14,7 @@ mod std {
 
     pub mod collections {
         pub use alloc::collections::BTreeMap as HashMap;
+        pub use alloc::collections::BTreeSet as HashSet;
     }
     pub mod prelude {
         pub use alloc::borrow::ToOwned;

+ 27 - 11
src/matcher.rs

@@ -6,6 +6,7 @@ use std::prelude::*;
 use compile::{Token, Range};
 use ctype;
 use std::borrow::Cow;
+use std::collections::HashSet;
 use std::fmt;
 use std::rc::Rc;
 use tree::{*, Node as TreeNode};
@@ -156,9 +157,14 @@ impl<'a> PosixRegex<'a> {
         let tree = vec![Node::new(&tree, tree.root, groups)];
 
         let mut matches = Vec::new();
-        while max.map(|max| max > 0).unwrap_or(true) {
+        while max.map(|max| max > 0).unwrap_or(true) && matcher.offset <= matcher.input.len() {
             match matcher.matches_exact(tree.clone()) {
-                Some(groups) => matches.push(groups),
+                Some(groups) => {
+                    if groups[0].unwrap().0 == groups[0].unwrap().1 {
+                        matcher.offset += 1;
+                    }
+                    matches.push(groups)
+                },
                 None => break
             }
             max = max.map(|max| max - 1);
@@ -328,12 +334,16 @@ struct PosixRegexMatcher<'a> {
     offset: usize
 }
 impl<'a> PosixRegexMatcher<'a> {
-    fn expand<'b>(&mut self, branches: &mut [Node<'b>]) -> Vec<Node<'b>> {
+    fn expand<'b>(&mut self, skip: &mut HashSet<NodeId>, branches: &mut [Node<'b>]) -> Vec<Node<'b>> {
         let mut insert = Vec::new();
 
-        for branch in branches {
+        for branch in &mut *branches {
             branch.update_group_end(self.offset);
 
+            if skip.contains(&branch.node) {
+                continue;
+            }
+
             let node = branch.node();
 
             if let Token::Group(_) = node.token {
@@ -348,7 +358,10 @@ impl<'a> PosixRegexMatcher<'a> {
         }
 
         if !insert.is_empty() {
-            let mut new = self.expand(&mut insert);
+            for branch in &mut *branches {
+                skip.insert(branch.node);
+            }
+            let mut new = self.expand(skip, &mut insert);
             insert.append(&mut new);
         }
         insert
@@ -363,7 +376,7 @@ impl<'a> PosixRegexMatcher<'a> {
         loop {
             let next = self.input.get(self.offset).cloned();
 
-            let mut insert = self.expand(&mut branches);
+            let mut insert = self.expand(&mut HashSet::new(), &mut branches);
             branches.append(&mut insert);
 
             // Handle zero-width stuff
@@ -414,7 +427,7 @@ impl<'a> PosixRegexMatcher<'a> {
                 if insert.is_empty() {
                     break;
                 }
-                let mut insert2 = self.expand(&mut insert);
+                let mut insert2 = self.expand(&mut HashSet::new(), &mut insert);
                 branches.append(&mut insert);
                 branches.append(&mut insert2);
             }
@@ -465,9 +478,7 @@ impl<'a> PosixRegexMatcher<'a> {
                     if branch.is_finished() {
                         let mut add = true;
                         if let Some((new_start, new_end)) = branch.prev[0] {
-                            if new_start == new_end {
-                                add = false;
-                            } else if let Some(previous) = succeeded.as_ref() {
+                            if let Some(previous) = succeeded.as_ref() {
                                 if let Some((prev_start, prev_end)) = previous.prev[0] {
                                     if new_end - new_start <= prev_end - prev_start {
                                         add = false;
@@ -656,9 +667,13 @@ mod tests {
             vec![abox![Some((6, 10)), Some((8, 10))]]
         );
         assert_eq!(
-            matches(r"o*", "helloooooooo woooorld, hooow are you?"),
+            matches(r"o\+", "helloooooooo woooorld, hooow are you?"),
             vec![abox![Some((4, 12))], abox![Some((14, 18))], abox![Some((24, 27))], abox![Some((34, 35))]]
         );
+        assert_eq!(
+            matches(r"z*", "abc"),
+            vec![abox![Some((0, 0))], abox![Some((1, 1))], abox![Some((2, 2))], abox![Some((3, 3))]]
+        );
     }
     #[test]
     fn start_and_end() {
@@ -692,6 +707,7 @@ mod tests {
     }
     #[test]
     fn groups() {
+        assert!(matches_exact(r"\(a*\)*", "aaaaa").is_some());
         assert!(matches_exact(r"\(hello\) world", "hello world").is_some());
         assert!(matches_exact(r"\(a*\|b\|c\)d", "d").is_some());
         assert!(matches_exact(r"\(a*\|b\|c\)d", "aaaad").is_some());