Эх сурвалжийг харах

More last minute interface changes

jD91mZM2 6 жил өмнө
parent
commit
eefcccb431
4 өөрчлөгдсөн 56 нэмэгдсэн , 70 устгасан
  1. 1 1
      README.md
  2. 7 6
      src/compile.rs
  3. 1 2
      src/lib.rs
  4. 47 61
      src/matcher.rs

+ 1 - 1
README.md

@@ -3,4 +3,4 @@
 A library for parsing POSIX regular expressions. Only supports ASCII.  Created
 for use in relibc and does not require the std.
 
-Currently only supports enhanced regex.
+Currently only supports basic enhanced regex.

+ 7 - 6
src/compile.rs

@@ -4,9 +4,10 @@
 #[cfg(feature = "no_std")]
 use std::prelude::*;
 
+use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fmt;
-use ::{ctype, PosixRegex};
+use {ctype, PosixRegex};
 
 /// Repetition bounds, for example + is (1, None), and ? is (0, Some(1))
 #[derive(Clone, Copy, PartialEq, Eq)]
@@ -20,7 +21,7 @@ impl fmt::Debug for Range {
     }
 }
 
-/// An item inside square brackets, like [abc] or [[:digit:]]
+/// An item inside square brackets, like `[abc]` or `[[:digit:]]`
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub enum Collation {
     Char(u8),
@@ -98,12 +99,12 @@ impl<'a> PosixRegexBuilder<'a> {
             classes: HashMap::new()
         }
     }
-    /// Add a custom collation class, for use within square brackets (such as [[:digit:]])
+    /// Add a custom collation class, for use within square brackets (such as `[[:digit:]]`)
     pub fn with_class(mut self, name: &'a [u8], callback: fn(u8) -> bool) -> Self {
         self.classes.insert(name, callback);
         self
     }
-    /// Add all the default collation classes, like [[:digit:]] and [[:alnum:]]
+    /// Add all the default collation classes, like `[[:digit:]]` and `[[:alnum:]]`
     pub fn with_default_classes(mut self) -> Self {
         #[cfg(not(feature = "no_std"))]
         self.classes.reserve(12);
@@ -124,9 +125,9 @@ impl<'a> PosixRegexBuilder<'a> {
         self
     }
     /// "Compile" this regex to a struct ready to match input
-    pub fn compile(&mut self) -> Result<PosixRegex, Error> {
+    pub fn compile(&mut self) -> Result<PosixRegex<'static>, Error> {
         let search = self.compile_tokens()?;
-        Ok(PosixRegex::new(search))
+        Ok(PosixRegex::new(Cow::Owned(search)))
     }
 
     fn consume(&mut self, amount: usize) {

+ 1 - 2
src/lib.rs

@@ -13,8 +13,7 @@ mod std {
     pub use core::*;
 
     pub mod collections {
-        pub use alloc::collections::*;
-        pub use alloc::collections::BTreeMap as HashMap;
+        pub use alloc::btree_map::BTreeMap as HashMap;
     }
     pub mod prelude {
         pub use alloc::borrow::ToOwned;

+ 47 - 61
src/matcher.rs

@@ -3,26 +3,26 @@
 #[cfg(feature = "no_std")]
 use std::prelude::*;
 
-use ::ctype;
 use compile::{Token, Range};
+use ctype;
 use std::borrow::Cow;
 use std::rc::Rc;
 use std::{fmt, mem};
 
 /// A regex matcher, ready to match stuff
 #[derive(Clone)]
-pub struct PosixRegex {
-    branches: Vec<Vec<(Token, Range)>>,
+pub struct PosixRegex<'a> {
+    branches: Cow<'a, [Vec<(Token, Range)>]>,
     case_insensitive: bool,
     newline: bool,
     no_start: bool,
     no_end: bool
 }
-impl PosixRegex {
+impl<'a> PosixRegex<'a> {
     /// Create a new matcher instance from the specified alternations. This
     /// should probably not be used and instead an instance should be obtained
     /// from `PosixRegexBuilder`, which also compiles a string into regex.
-    pub fn new(branches: Vec<Vec<(Token, Range)>>) -> Self {
+    pub fn new(branches: Cow<'a, [Vec<(Token, Range)>]>) -> Self {
         Self {
             branches,
             case_insensitive: false,
@@ -59,7 +59,7 @@ impl PosixRegex {
     }
     /// Match the string starting at the current position. This does not find
     /// substrings.
-    pub fn matches_exact(&self, input: &[u8]) -> Option<PosixRegexResult> {
+    pub fn matches_exact(&self, input: &[u8]) -> Option<Vec<(usize, usize)>> {
         let mut groups = Vec::new();
         let mut matcher = PosixRegexMatcher {
             base: self,
@@ -71,20 +71,16 @@ impl PosixRegex {
             .filter_map(|tokens| Branch::new(tokens))
             .collect();
 
-        let start = matcher.offset;
+        matcher.groups.push((matcher.offset, 0));
         if !matcher.matches_exact(branches) {
             return None;
         }
-        let end = matcher.offset;
+        groups[0].1 = matcher.offset;
 
-        Some(PosixRegexResult {
-            start,
-            end,
-            groups
-        })
+        Some(groups)
     }
-    /// Match any substrings in the string
-    pub fn matches(&self, input: &[u8]) -> Vec<PosixRegexResult> {
+    /// Match any substrings in the string, but optionally no more than `max`
+    pub fn matches(&self, input: &[u8], mut max: Option<usize>) -> Vec<Vec<(usize, usize)>> {
         let mut groups = Vec::new();
         let mut matcher = PosixRegexMatcher {
             base: self,
@@ -93,26 +89,18 @@ impl PosixRegex {
             groups: &mut groups
         };
 
-        let branches: Vec<_> = self.branches.iter()
-            .map(|tokens| {
-                let mut new = Vec::with_capacity(1 + tokens.len());
-                new.push((Token::InternalStart, Range(0, None)));
-                new.extend_from_slice(tokens);
-                new
-            })
-            .collect();
+        let tokens = vec![
+            (Token::InternalStart, Range(0, None)),
+            (Token::Group(self.branches.to_vec()), Range(1, Some(1)))
+        ];
+        let branches = vec![
+            Branch::new(&tokens).unwrap()
+        ];
 
         let mut matches = Vec::new();
-        while matcher.matches_exact({
-            branches.iter()
-                .filter_map(|tokens| Branch::new(tokens))
-                .collect()
-        }) {
-            matches.push(PosixRegexResult {
-                start: 0,
-                end: matcher.offset,
-                groups: mem::replace(matcher.groups, Vec::new())
-            });
+        while max.map(|max| max > 0).unwrap_or(true) && matcher.matches_exact(branches.clone()) {
+            matches.push(mem::replace(matcher.groups, Vec::new()));
+            max = max.map(|max| max - 1);
         }
         matches
     }
@@ -260,7 +248,7 @@ impl<'a> Branch<'a> {
             ..self.clone()
         })
     }
-    fn add_repeats(&self, branches: &mut Vec<Branch<'a>>) {
+    fn add_repeats(&self, branches: &mut Vec<Branch<'a>>, offset: usize) {
         if self.repeat_max.map(|max| max == 0).unwrap_or(false) {
             return;
         }
@@ -271,7 +259,9 @@ impl<'a> Branch<'a> {
             (Token::Group(ref repeats), _) => {
                 for alternative in 0..repeats.len() {
                     let mut path = self.path.clone();
-                    path.last_mut().unwrap().variant = alternative;
+                    let last = path.last_mut().unwrap();
+                    last.start = offset;
+                    last.variant = alternative;
 
                     branches.push(Self {
                         index: 0,
@@ -311,7 +301,7 @@ impl<'a> Branch<'a> {
 }
 
 struct PosixRegexMatcher<'a> {
-    base: &'a PosixRegex,
+    base: &'a PosixRegex<'a>,
     input: &'a [u8],
     offset: usize,
     groups: &'a mut Vec<(usize, usize)>
@@ -352,7 +342,7 @@ impl<'a> PosixRegexMatcher<'a> {
                 if let Some(next) = branch.next_branch() {
                     insert.push(next);
                 }
-                branch.add_repeats(&mut insert);
+                branch.add_repeats(&mut insert, self.offset);
             }
         }
 
@@ -490,17 +480,6 @@ impl<'a> PosixRegexMatcher<'a> {
     }
 }
 
-/// A single result, including start and end bounds
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct PosixRegexResult {
-    /// An offset in the input string to where the match started (inclusive)
-    pub start: usize,
-    /// An offset in the input string to where the match ended (exclusive)
-    pub end: usize,
-    /// Start/end offsets in the input to where a group matched
-    pub groups: Vec<(usize, usize)>
-}
-
 #[cfg(test)]
 mod tests {
     #[cfg(feature = "bench")]
@@ -518,12 +497,11 @@ mod tests {
             .compile()
             .expect("error compiling regex")
     }
-    #[cfg(feature = "bench")]
-    fn matches(regex: &str, input: &str) -> Vec<PosixRegexResult> {
+    fn matches(regex: &str, input: &str) -> Vec<Vec<(usize, usize)>> {
         compile(regex)
-            .matches(input.as_bytes())
+            .matches(input.as_bytes(), None)
     }
-    fn matches_exact(regex: &str, input: &str) -> Option<PosixRegexResult> {
+    fn matches_exact(regex: &str, input: &str) -> Option<Vec<(usize, usize)>> {
         compile(regex)
             .matches_exact(input.as_bytes())
     }
@@ -578,39 +556,47 @@ mod tests {
     fn offsets() {
         assert_eq!(
             matches_exact("abc", "abcd"),
-            Some(PosixRegexResult { start: 0, end: 3, groups: vec![] })
+            Some(vec![(0, 3)])
         );
         assert_eq!(
             matches_exact(r"[[:alpha:]]\+", "abcde12345"),
-            Some(PosixRegexResult { start: 0, end: 5, groups: vec![] })
+            Some(vec![(0, 5)])
         );
         assert_eq!(
             matches_exact(r"a\(bc\)\+d", "abcbcd"),
-            Some(PosixRegexResult { start: 0, end: 6, groups: vec![(1, 5)] })
+            Some(vec![(0, 6), (3, 5)])
         );
         assert_eq!(
             matches_exact(r"hello\( \(world\|universe\) :D\)\?!", "hello world :D!"),
-            Some(PosixRegexResult { start: 0, end: 15, groups: vec![(5, 14), (6, 11)] })
+            Some(vec![(0, 15), (5, 14), (6, 11)])
         );
         assert_eq!(
             matches_exact(r"hello\( \(world\|universe\) :D\)\?", "hello world :D"),
-            Some(PosixRegexResult { start: 0, end: 14, groups: vec![(5, 14), (6, 11)] })
+            Some(vec![(0, 14), (5, 14), (6, 11)])
         );
         assert_eq!(
             matches_exact(r"\(\<hello\>\) world", "hello world"),
-            Some(PosixRegexResult { start: 0, end: 11, groups: vec![(0, 5)] })
+            Some(vec![(0, 11), (0, 5)])
         );
         assert_eq!(
             matches_exact(r".*d", "hid howd ared youd"),
-            Some(PosixRegexResult { start: 0, end: 18, groups: vec![] })
+            Some(vec![(0, 18)])
         );
         assert_eq!(
             matches_exact(r".*\(a\)", "bbbbba"),
-            Some(PosixRegexResult { start: 0, end: 6, groups: vec![(5, 6)] })
+            Some(vec![(0, 6), (5, 6)])
         );
         assert_eq!(
             matches_exact(r"\(a \(b\) \(c\)\) \(d\)", "a b c d"),
-            Some(PosixRegexResult { start: 0, end: 7, groups: vec![(0, 5), (2, 3), (4, 5), (6, 7)] })
+            Some(vec![(0, 7), (0, 5), (2, 3), (4, 5), (6, 7)])
+        );
+        assert_eq!(
+            matches_exact(r"\(.\)*", "hello"),
+            Some(vec![(0, 5), (4, 5)])
+        );
+        assert_eq!(
+            matches("hi", "hello hi lol"),
+            vec!(vec![(6, 8)])
         );
     }
     #[test]