6 年之前 · c8f82a17fb
--- a/README.md
+++ b/README.md
@@ -11,6 +11,3 @@ Regex compiler:
 
				  - Alternative syntax for word boundaries: `[[:<:]]` and `[[:>:]]`
			
 
				  - Short forms of character classes, like `\d`
			
 
				  - Unnamed groups
			
 
				-
			
 
				-Matcher:
			
 
				- - Word boundaries
			
--- a/src/ctype.rs
+++ b/src/ctype.rs
@@ -34,3 +34,7 @@ pub fn is_upper(c: u8) -> bool {
 
				 pub fn is_xdigit(c: u8) -> bool {
			
 
				     is_digit(c) || (c >= b'a' && c <= b'f') || (c >= b'A' && c <= b'F')
			
 
				 }
			
 
				+
			
 
				+pub fn is_word_boundary(c: u8) -> bool {
			
 
				+    !is_alnum(c) && c != b'_'
			
 
				+}
			
--- a/src/matcher.rs
+++ b/src/matcher.rs
@@ -125,7 +125,7 @@ impl<'a> Branch<'a> {
 
				 
			
 
				         loop {
			
 
				             if branch.repeat_min > 0 {
			
 
				-                // Did not repeat enough times!
			
 
				+                // Group did not repeat enough times!
			
 
				                 return false;
			
 
				             }
			
 
				 
			
@@ -227,6 +227,11 @@ impl<'a> PosixRegexMatcher<'a> {
 
				     }
			
 
				 
			
 
				     fn matches_exact(&mut self, mut branches: Vec<Branch>) -> bool {
			
 
				+        // Whether or not any branch, at any point, got fully explored. This
			
 
				+        // means at least one path of the regex successfully completed!
			
 
				+        let mut succeeded = false;
			
 
				+        let mut prev = None;
			
 
				+
			
 
				         loop {
			
 
				             let next = self.input.get(self.offset).cloned();
			
 
				             //println!();
			
@@ -237,10 +242,6 @@ impl<'a> PosixRegexMatcher<'a> {
 
				             let mut insert = self.expand(&branches);
			
 
				             branches.append(&mut insert);
			
 
				 
			
 
				-            // Whether or not any branch got fully explored. This means at
			
 
				-            // least one path of the regex successfully completed!
			
 
				-            let mut happy = false;
			
 
				-
			
 
				             //println!("Branches: {:?}", branches);
			
 
				             loop {
			
 
				                 if index >= branches.len() {
			
@@ -258,22 +259,39 @@ impl<'a> PosixRegexMatcher<'a> {
 
				 
			
 
				                 let mut accepts = true;
			
 
				 
			
 
				-                if let Token::Start = token {
			
 
				-                    // Skip ahead to the next token.
			
 
				-                    match branch.next_branch() {
			
 
				-                        Some(next) => *branch = next,
			
 
				-                        None => {
			
 
				-                            remove += 1;
			
 
				-                            continue;
			
 
				-                        }
			
 
				-                    };
			
 
				-                    let (ref new_token, Range(_, new_max)) = *branch.get_token();
			
 
				-                    token = new_token;
			
 
				-                    max = new_max;
			
 
				-
			
 
				-                    accepts = self.offset == 0;
			
 
				+                loop {
			
 
				+                    match token {
			
 
				+                        Token::Start |
			
 
				+                        Token::WordEnd |
			
 
				+                        Token::WordStart => {
			
 
				+                            // Should be cheap to clone since we already make sure
			
 
				+                            // it's a type that doesn't hold any data.
			
 
				+                            let original = token.clone();
			
 
				+
			
 
				+                            // Skip ahead to the next token.
			
 
				+                            match branch.next_branch() {
			
 
				+                                Some(next) => *branch = next,
			
 
				+                                None => break
			
 
				+                            }
			
 
				+                            let (ref new_token, Range(_, new_max)) = *branch.get_token();
			
 
				+                            token = new_token;
			
 
				+                            max = new_max;
			
 
				+
			
 
				+                            //println!("Or {:?}", token);
			
 
				+
			
 
				+                            accepts = match original {
			
 
				+                                Token::Start => self.offset == 0,
			
 
				+                                Token::WordEnd => next.map(::ctype::is_word_boundary).unwrap_or(true),
			
 
				+                                Token::WordStart => prev.map(::ctype::is_word_boundary).unwrap_or(true),
			
 
				+                                _ => unreachable!()
			
 
				+                            };
			
 
				+                        },
			
 
				+                        _ => break
			
 
				+                    }
			
 
				                 }
			
 
				 
			
 
				+                //println!("Accepts before: {:?}", accepts);
			
 
				+
			
 
				                 accepts = accepts && match *token {
			
 
				                     Token::Any => next.is_some(),
			
 
				                     Token::Char(c) => next == Some(c),
			
@@ -282,10 +300,20 @@ impl<'a> PosixRegexMatcher<'a> {
 
				                     Token::OneOf { invert, ref list } => if let Some(next) = next {
			
 
				                         list.iter().any(|c| c.matches(next)) == !invert
			
 
				                     } else { false },
			
 
				-                    _ => unimplemented!("TODO")
			
 
				+
			
 
				+                    // These will only get called if they are encountered at
			
 
				+                    // EOF, for example "abc\>" or "^". Then we simply want to
			
 
				+                    // return true as to preserve the current `accepts` status.
			
 
				+                    Token::Start |
			
 
				+                    Token::WordEnd |
			
 
				+                    Token::WordStart => true
			
 
				                 };
			
 
				+
			
 
				+                //println!("Accepts after: {:?}", accepts);
			
 
				+
			
 
				                 if !accepts || max.map(|max| branch.repeated >= max).unwrap_or(false) {
			
 
				-                    happy = happy || branch.is_explored();
			
 
				+                    succeeded = succeeded || branch.is_explored();
			
 
				+                    //println!("-> Deleted! Succeeded: {}", succeeded);
			
 
				                     for &id in &*branch.group_ids {
			
 
				                         self.groups[id].1 = self.offset;
			
 
				                     }
			
@@ -298,11 +326,12 @@ impl<'a> PosixRegexMatcher<'a> {
 
				             branches.truncate(end);
			
 
				 
			
 
				             if branches.is_empty() {
			
 
				-                return happy;
			
 
				+                return succeeded;
			
 
				             }
			
 
				 
			
 
				             if next.is_some() {
			
 
				                 self.offset += 1;
			
 
				+                prev = next;
			
 
				             }
			
 
				         }
			
 
				     }
			
@@ -405,6 +434,7 @@ mod tests {
 
				     #[test]
			
 
				     fn start_and_end() {
			
 
				         assert!(matches_exact("^abc$", "abc").is_some());
			
 
				+        assert!(matches_exact("^bcd", "bcde").is_some());
			
 
				         assert!(matches_exact("^bcd", "abcd").is_none());
			
 
				         assert!(matches_exact("abc$", "abc").is_some());
			
 
				         assert!(matches_exact("abc$", "abcd").is_none());
			
@@ -412,6 +442,24 @@ mod tests {
 
				         assert!(matches_exact(r".*\(^\|a\)c", "c").is_some());
			
 
				         assert!(matches_exact(r".*\(^\|a\)c", "ac").is_some());
			
 
				         assert!(matches_exact(r".*\(^\|a\)c", "bc").is_none());
			
 
				+
			
 
				+        // Tests if ^ can be repeated without issues
			
 
				+        assert!(matches_exact(".*^^a", "helloabc").is_none());
			
 
				+        assert!(matches_exact(".*^^a", "abc").is_some());
			
 
				+    }
			
 
				+    #[test]
			
 
				+    fn word_boundaries() {
			
 
				+        assert!(matches_exact(r"hello\>.world", "hello world").is_some());
			
 
				+        assert!(matches_exact(r"hello\>.world", "hello!world").is_some());
			
 
				+        assert!(matches_exact(r"hello\>.world", "hellooworld").is_none());
			
 
				+
			
 
				+        assert!(matches_exact(r"hello.\<world", "hello world").is_some());
			
 
				+        assert!(matches_exact(r"hello.\<world", "hello!world").is_some());
			
 
				+        assert!(matches_exact(r"hello.\<world", "hellooworld").is_none());
			
 
				+
			
 
				+        assert!(matches_exact(r".*\<hello\>", "hihello").is_none());
			
 
				+        assert!(matches_exact(r".*\<hello\>", "hi_hello").is_none());
			
 
				+        assert!(matches_exact(r".*\<hello\>", "hi hello").is_some());
			
 
				     }
			
 
				     #[test]
			
 
				     fn groups() {