6 years ago · 7648b78f45
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
 
				+/target
			
 
				+**/*.rs.bk
			
 
				+Cargo.lock
			
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,10 @@
 
				+[package]
			
 
				+name = "posix-regex"
			
 
				+version = "0.1.0"
			
 
				+authors = ["jD91mZM2 <me@krake.one>"]
			
 
				+
			
 
				+[dependencies]
			
 
				+
			
 
				+[features]
			
 
				+debug = []
			
 
				+no_std = []
			
--- a/LICENSE
+++ b/LICENSE
@@ -0,0 +1,21 @@
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2018 jD91mZM2
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1,16 @@
 
				+# posix-regex
			
 
				+
			
 
				+A WIP library for parsing POSIX regular expressions. Only supports ASCII.
			
 
				+Created for use in relibc and does not require the std.
			
 
				+
			
 
				+Currently only supports enhanced regex.
			
 
				+
			
 
				+## Known TODOs
			
 
				+
			
 
				+Regex compiler:
			
 
				+ - Unnamed groups
			
 
				+ - Alternative syntax for word boundaries: `[[:<:]]` and `[[:>:]]`
			
 
				+
			
 
				+Matcher:
			
 
				+ - Groups (these are difficult because you can repeat them like any other token)
			
 
				+ - Word boundaries
			
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -0,0 +1,451 @@
 
				+//! The regex "compiler", which parses the regex itself.
			
 
				+//! Produces a matcher ready to match input.
			
 
				+
			
 
				+#[cfg(feature = "no_std")]
			
 
				+use std::prelude::*;
			
 
				+
			
 
				+use std::collections::HashMap;
			
 
				+use std::fmt;
			
 
				+use ::{ctype, PosixRegex};
			
 
				+
			
 
				+/// Repetition bounds, for example + is (1, None), and ? is (0, Some(1))
			
 
				+#[derive(Clone, PartialEq, Eq)]
			
 
				+pub struct Range(pub u32, pub Option<u32>);
			
 
				+impl fmt::Debug for Range {
			
 
				+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
			
 
				+        match self {
			
 
				+            Range(start, None) => write!(f, "{}..", start),
			
 
				+            Range(start, Some(end)) => write!(f, "{}..{}", start, end),
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/// An item inside square brackets, like [abc] or [[:digit:]]
			
 
				+#[derive(Clone, Debug, PartialEq, Eq)]
			
 
				+pub enum Collation {
			
 
				+    Char(u8),
			
 
				+    Class(fn(u8) -> bool)
			
 
				+}
			
 
				+impl Collation {
			
 
				+    /// Compare this collation to a character
			
 
				+    pub fn matches(&self, other: u8) -> bool {
			
 
				+        match *self {
			
 
				+            Collation::Char(me) => me == other,
			
 
				+            Collation::Class(f) => f(other)
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/// A single "compiled" token, such as a `.` or a character literal
			
 
				+#[derive(Clone, PartialEq, Eq)]
			
 
				+pub enum Token {
			
 
				+    Any,
			
 
				+    Char(u8),
			
 
				+    End,
			
 
				+    Group(Vec<Vec<(Token, Range)>>),
			
 
				+    OneOf {
			
 
				+        invert: bool,
			
 
				+        list: Vec<Collation>
			
 
				+    },
			
 
				+    Start,
			
 
				+    WordEnd,
			
 
				+    WordStart
			
 
				+}
			
 
				+impl fmt::Debug for Token {
			
 
				+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
			
 
				+        match *self {
			
 
				+            Token::Any => write!(f, "."),
			
 
				+            Token::Char(c) => write!(f, "{:?}", c as char),
			
 
				+            Token::End => write!(f, "$"),
			
 
				+            Token::Group(ref inner) => write!(f, "Group({:?})", inner),
			
 
				+            Token::OneOf { invert, ref list } => write!(f, "[invert: {}; {:?}]", invert, list),
			
 
				+            Token::Start => write!(f, "^"),
			
 
				+            Token::WordEnd => write!(f, ">"),
			
 
				+            Token::WordStart => write!(f, "<")
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+/// An error that occurred while compiling the regex
			
 
				+#[derive(Clone, Debug, PartialEq, Eq)]
			
 
				+pub enum Error {
			
 
				+    EOF,
			
 
				+    EmptyRepetition,
			
 
				+    Expected(u8, Option<u8>),
			
 
				+    IllegalRange,
			
 
				+    IntegerOverflow,
			
 
				+    LeadingRepetition,
			
 
				+    UnclosedRepetition,
			
 
				+    UnexpectedToken(u8),
			
 
				+    UnknownClass(Vec<u8>),
			
 
				+    UnknownCollation
			
 
				+}
			
 
				+
			
 
				+/// A regex builder struct
			
 
				+pub struct PosixRegexBuilder<'a> {
			
 
				+    input: &'a [u8],
			
 
				+    classes: HashMap<&'a [u8], fn(u8) -> bool>
			
 
				+}
			
 
				+impl<'a> PosixRegexBuilder<'a> {
			
 
				+    /// Create a new instance that is ready to parse the regex `input`
			
 
				+    pub fn new(input: &'a [u8]) -> Self {
			
 
				+        Self {
			
 
				+            input,
			
 
				+            classes: HashMap::new()
			
 
				+        }
			
 
				+    }
			
 
				+    /// Add a custom collation class, for use within square brackets (such as [[:digit:]])
			
 
				+    pub fn with_class(mut self, name: &'a [u8], callback: fn(u8) -> bool) -> Self {
			
 
				+        self.classes.insert(name, callback);
			
 
				+        self
			
 
				+    }
			
 
				+    /// Add all the default collation classes, like [[:digit:]] and [[:alnum:]]
			
 
				+    pub fn with_default_classes(mut self) -> Self {
			
 
				+        self.classes.reserve(12);
			
 
				+        self.classes.insert(b"alnum", ctype::is_alnum);
			
 
				+        self.classes.insert(b"alpha", ctype::is_alpha);
			
 
				+        self.classes.insert(b"blank", ctype::is_blank);
			
 
				+        self.classes.insert(b"cntrl", ctype::is_cntrl);
			
 
				+        self.classes.insert(b"digit", ctype::is_digit);
			
 
				+        self.classes.insert(b"graph", ctype::is_graph);
			
 
				+        self.classes.insert(b"lower", ctype::is_lower);
			
 
				+        self.classes.insert(b"print", ctype::is_print);
			
 
				+        self.classes.insert(b"punct", ctype::is_punct);
			
 
				+        self.classes.insert(b"space", ctype::is_space);
			
 
				+        self.classes.insert(b"upper", ctype::is_upper);
			
 
				+        self.classes.insert(b"xdigit", ctype::is_xdigit);
			
 
				+        self
			
 
				+    }
			
 
				+    /// "Compile" this regex to a struct ready to match input
			
 
				+    pub fn compile(&mut self) -> Result<PosixRegex, Error> {
			
 
				+        let search = self.compile_inner(true)?;
			
 
				+        Ok(PosixRegex {
			
 
				+            search
			
 
				+        })
			
 
				+    }
			
 
				+
			
 
				+    fn consume(&mut self, amount: usize) {
			
 
				+        self.input = &self.input[amount..];
			
 
				+    }
			
 
				+    fn take_int(&mut self) -> Result<Option<u32>, Error> {
			
 
				+        let mut out: Option<u32> = None;
			
 
				+        while let Some(&c @ b'0'..=b'9') = self.input.first() {
			
 
				+            self.consume(1);
			
 
				+            out = Some(out.unwrap_or(0)
			
 
				+                .checked_mul(10)
			
 
				+                .and_then(|out| out.checked_add((c - b'0') as u32))
			
 
				+                .ok_or(Error::IntegerOverflow)?);
			
 
				+        }
			
 
				+        Ok(out)
			
 
				+    }
			
 
				+    fn next(&mut self) -> Result<u8, Error> {
			
 
				+        self.input.first()
			
 
				+            .map(|&c| { self.consume(1); c })
			
 
				+            .ok_or(Error::EOF)
			
 
				+    }
			
 
				+    fn expect(&mut self, c: u8) -> Result<(), Error> {
			
 
				+        if self.input.first() != Some(&c) {
			
 
				+            return Err(Error::Expected(c, self.input.first().cloned()));
			
 
				+        }
			
 
				+        self.consume(1);
			
 
				+        Ok(())
			
 
				+    }
			
 
				+    fn compile_inner(&mut self, toplevel: bool) -> Result<Vec<(Token, Range)>, Error> {
			
 
				+        let mut search: Vec<(Token, Range)> = Vec::new();
			
 
				+
			
 
				+        while let Some(&c) = self.input.first() {
			
 
				+            self.consume(1);
			
 
				+            let token = match c {
			
 
				+                b'^' => Token::Start,
			
 
				+                b'$' => Token::End,
			
 
				+                b'.' => Token::Any,
			
 
				+                b'*' => if let Some(last) = search.last_mut() {
			
 
				+                    last.1 = Range(0, None);
			
 
				+                    continue;
			
 
				+                } else {
			
 
				+                    return Err(Error::LeadingRepetition);
			
 
				+                },
			
 
				+                b'[' => {
			
 
				+                    let mut list = Vec::new();
			
 
				+                    let invert = self.input.first() == Some(&b'^');
			
 
				+
			
 
				+                    if invert {
			
 
				+                        self.consume(1);
			
 
				+                    }
			
 
				+
			
 
				+                    loop {
			
 
				+                        let mut c = self.next()?;
			
 
				+
			
 
				+                        let mut push = true;
			
 
				+
			
 
				+                        if c == b'[' {
			
 
				+                            // TODO: Handle collation characters properly,
			
 
				+                            // because currently idk what they are and only
			
 
				+                            // have the behavior of `grep` to go on.
			
 
				+                            match self.next()? {
			
 
				+                                b'.' => {
			
 
				+                                    c = self.next()?;
			
 
				+                                    self.expect(b'.')?;
			
 
				+                                    self.expect(b']')?;
			
 
				+                                },
			
 
				+                                b'=' => {
			
 
				+                                    c = self.next()?;
			
 
				+                                    self.expect(b'=')?;
			
 
				+                                    self.expect(b']')?;
			
 
				+                                },
			
 
				+                                b':' => {
			
 
				+                                    let end = self.input.iter().position(|&c| c == b':').ok_or(Error::EOF)?;
			
 
				+                                    let key = &self.input[..end];
			
 
				+                                    let class = *self.classes.get(key).ok_or_else(|| Error::UnknownClass(key.to_vec()))?;
			
 
				+                                    self.consume(end + 1);
			
 
				+                                    self.expect(b']')?;
			
 
				+
			
 
				+                                    list.push(Collation::Class(class));
			
 
				+                                    push = false;
			
 
				+                                },
			
 
				+                                _ => return Err(Error::UnknownCollation)
			
 
				+                            }
			
 
				+                        }
			
 
				+
			
 
				+                        if push {
			
 
				+                            list.push(Collation::Char(c));
			
 
				+
			
 
				+                            if self.input.first() == Some(&b'-') && self.input.get(1) != Some(&b']') {
			
 
				+                                self.consume(1);
			
 
				+                                let dest = self.next()?;
			
 
				+                                for c in (c+1)..=dest {
			
 
				+                                    list.push(Collation::Char(c));
			
 
				+                                }
			
 
				+                            }
			
 
				+                        }
			
 
				+
			
 
				+                        if self.input.first() == Some(&b']') {
			
 
				+                            self.consume(1);
			
 
				+                            break;
			
 
				+                        }
			
 
				+                    }
			
 
				+
			
 
				+                    Token::OneOf {
			
 
				+                        invert,
			
 
				+                        list
			
 
				+                    }
			
 
				+                },
			
 
				+                b'\\' => match self.input.first() {
			
 
				+                    None => return Err(Error::EOF),
			
 
				+                    Some(b'|') | Some(b')') if !toplevel => return Ok(search),
			
 
				+                    Some(&c @ b'|') | Some(&c @ b')') if toplevel => return Err(Error::UnexpectedToken(c)),
			
 
				+                    Some(&c) => {
			
 
				+                        self.consume(1);
			
 
				+                        match c {
			
 
				+                            b'(' => {
			
 
				+                                let mut branches = Vec::new();
			
 
				+                                loop {
			
 
				+                                    let inner = self.compile_inner(false)?;
			
 
				+                                    branches.push(inner);
			
 
				+                                    match self.next()? {
			
 
				+                                        b'|' => (),
			
 
				+                                        b')' => break,
			
 
				+                                        _ => unreachable!()
			
 
				+                                    }
			
 
				+                                }
			
 
				+                                Token::Group(branches)
			
 
				+                            },
			
 
				+                            b'<' => Token::WordStart,
			
 
				+                            b'>' => Token::WordEnd,
			
 
				+                            b'?' | b'+' => if let Some(last) = search.last_mut() {
			
 
				+                                last.1 = match c {
			
 
				+                                    b'?' => Range(0, Some(1)),
			
 
				+                                    b'+' => Range(1, None),
			
 
				+                                    _ => unreachable!()
			
 
				+                                };
			
 
				+                                continue;
			
 
				+                            } else {
			
 
				+                                return Err(Error::LeadingRepetition);
			
 
				+                            },
			
 
				+                            b'{' => if let Some(last) = search.last_mut() {
			
 
				+                                let first = self.take_int()?.ok_or(Error::EmptyRepetition)?;
			
 
				+                                let mut second = Some(first);
			
 
				+                                if let Some(b',') = self.input.first() {
			
 
				+                                    self.consume(1);
			
 
				+                                    second = self.take_int()?;
			
 
				+                                }
			
 
				+                                if self.input.first() == Some(&b'}') {
			
 
				+                                    self.consume(1);
			
 
				+                                } else if self.input.starts_with(br"\}") {
			
 
				+                                    self.consume(2);
			
 
				+                                } else {
			
 
				+                                    return Err(Error::UnclosedRepetition);
			
 
				+                                }
			
 
				+                                if second.map(|second| first > second).unwrap_or(false) {
			
 
				+                                    return Err(Error::IllegalRange);
			
 
				+                                }
			
 
				+                                last.1 = Range(first, second);
			
 
				+                                continue;
			
 
				+                            } else {
			
 
				+                                return Err(Error::LeadingRepetition);
			
 
				+                            },
			
 
				+                            c => Token::Char(c)
			
 
				+                        }
			
 
				+                    }
			
 
				+                },
			
 
				+                c => Token::Char(c)
			
 
				+            };
			
 
				+            search.push((token, Range(1, Some(1))));
			
 
				+        }
			
 
				+
			
 
				+        Ok(search)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+#[cfg(test)]
			
 
				+mod tests {
			
 
				+    use super::*;
			
 
				+
			
 
				+    fn compile(input: &[u8]) -> Vec<(Token, Range)> {
			
 
				+        PosixRegexBuilder::new(input)
			
 
				+            .with_default_classes()
			
 
				+            .compile()
			
 
				+            .expect("error compiling regex")
			
 
				+            .search
			
 
				+    }
			
 
				+    fn t(t: Token) -> (Token, Range) {
			
 
				+        (t, Range(1, Some(1)))
			
 
				+    }
			
 
				+    fn c(c: u8) -> (Token, Range) {
			
 
				+        t(Token::Char(c))
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn basic() {
			
 
				+        assert_eq!(compile(b"abc"), &[c(b'a'), c(b'b'), c(b'c')]);
			
 
				+    }
			
 
				+    #[test]
			
 
				+    fn groups() {
			
 
				+        assert_eq!(compile(br"\(abc\|bcd\|cde\)"), &[t(Token::Group(vec![
			
 
				+            vec![c(b'a'), c(b'b'), c(b'c')],
			
 
				+            vec![c(b'b'), c(b'c'), c(b'd')],
			
 
				+            vec![c(b'c'), c(b'd'), c(b'e')]
			
 
				+        ]))]);
			
 
				+        assert_eq!(compile(br"\(abc\|\(bcd\|cde\)\)"), &[
			
 
				+            t(Token::Group(vec![
			
 
				+                vec![c(b'a'), c(b'b'), c(b'c')],
			
 
				+                vec![t(Token::Group(vec![
			
 
				+                    vec![c(b'b'), c(b'c'), c(b'd')],
			
 
				+                    vec![c(b'c'), c(b'd'), c(b'e')]
			
 
				+                ]))]
			
 
				+            ]))
			
 
				+        ]);
			
 
				+    }
			
 
				+    #[test]
			
 
				+    fn words() {
			
 
				+        assert_eq!(
			
 
				+            compile(br"\<word\>"),
			
 
				+            &[t(Token::WordStart), c(b'w'), c(b'o'), c(b'r'), c(b'd'), t(Token::WordEnd)]
			
 
				+        );
			
 
				+    }
			
 
				+    #[test]
			
 
				+    fn repetitions() {
			
 
				+        assert_eq!(
			
 
				+            compile(br"yeee*"),
			
 
				+            &[c(b'y'), c(b'e'), c(b'e'), (Token::Char(b'e'), Range(0, None))]
			
 
				+        );
			
 
				+        assert_eq!(
			
 
				+            compile(br"yee\?"),
			
 
				+            &[c(b'y'), c(b'e'), (Token::Char(b'e'), Range(0, Some(1)))]
			
 
				+        );
			
 
				+        assert_eq!(
			
 
				+            compile(br"yee\+"),
			
 
				+            &[c(b'y'), c(b'e'), (Token::Char(b'e'), Range(1, None))]
			
 
				+        );
			
 
				+        assert_eq!(
			
 
				+            compile(br"ye\{2}"),
			
 
				+            &[c(b'y'), (Token::Char(b'e'), Range(2, Some(2)))]
			
 
				+        );
			
 
				+        assert_eq!(
			
 
				+            compile(br"ye\{2,}"),
			
 
				+            &[c(b'y'), (Token::Char(b'e'), Range(2, None))]
			
 
				+        );
			
 
				+        assert_eq!(
			
 
				+            compile(br"ye\{2,3}"),
			
 
				+            &[c(b'y'), (Token::Char(b'e'), Range(2, Some(3)))]
			
 
				+        );
			
 
				+    }
			
 
				+    #[test]
			
 
				+    fn bracket() {
			
 
				+        assert_eq!(
			
 
				+            compile(b"[abc]"),
			
 
				+            &[t(Token::OneOf {
			
 
				+                invert: false,
			
 
				+                list: vec![
			
 
				+                    Collation::Char(b'a'),
			
 
				+                    Collation::Char(b'b'),
			
 
				+                    Collation::Char(b'c')
			
 
				+                ]
			
 
				+            })]
			
 
				+        );
			
 
				+        assert_eq!(
			
 
				+            compile(b"[^abc]"),
			
 
				+            &[t(Token::OneOf {
			
 
				+                invert: true,
			
 
				+                list: vec![
			
 
				+                    Collation::Char(b'a'),
			
 
				+                    Collation::Char(b'b'),
			
 
				+                    Collation::Char(b'c')
			
 
				+                ]
			
 
				+            })]
			
 
				+        );
			
 
				+        assert_eq!(
			
 
				+            compile(b"[]] [^]]"),
			
 
				+            &[
			
 
				+                t(Token::OneOf { invert: false, list: vec![ Collation::Char(b']') ] }),
			
 
				+                c(b' '),
			
 
				+                t(Token::OneOf { invert: true,  list: vec![ Collation::Char(b']') ] }),
			
 
				+            ]
			
 
				+        );
			
 
				+        assert_eq!(
			
 
				+            compile(b"[0-3] [a-c] [-1] [1-]"),
			
 
				+            &[
			
 
				+                t(Token::OneOf { invert: false, list: vec![
			
 
				+                    Collation::Char(b'0'),
			
 
				+                    Collation::Char(b'1'),
			
 
				+                    Collation::Char(b'2'),
			
 
				+                    Collation::Char(b'3')
			
 
				+                ] }),
			
 
				+                c(b' '),
			
 
				+                t(Token::OneOf { invert: false, list: vec![
			
 
				+                    Collation::Char(b'a'),
			
 
				+                    Collation::Char(b'b'),
			
 
				+                    Collation::Char(b'c')
			
 
				+                ] }),
			
 
				+                c(b' '),
			
 
				+                t(Token::OneOf { invert: false, list: vec![
			
 
				+                    Collation::Char(b'-'),
			
 
				+                    Collation::Char(b'1')
			
 
				+                ] }),
			
 
				+                c(b' '),
			
 
				+                t(Token::OneOf { invert: false, list: vec![
			
 
				+                    Collation::Char(b'1'),
			
 
				+                    Collation::Char(b'-')
			
 
				+                ] })
			
 
				+            ]
			
 
				+        );
			
 
				+        assert_eq!(
			
 
				+            compile(b"[[.-.]-/]"),
			
 
				+            &[
			
 
				+                t(Token::OneOf { invert: false, list: vec![
			
 
				+                    Collation::Char(b'-'),
			
 
				+                    Collation::Char(b'.'),
			
 
				+                    Collation::Char(b'/')
			
 
				+                ] })
			
 
				+            ]
			
 
				+        );
			
 
				+        assert_eq!(
			
 
				+            compile(b"[[:digit:][:upper:]]"),
			
 
				+            &[
			
 
				+                t(Token::OneOf { invert: false, list: vec![
			
 
				+                    Collation::Class(ctype::is_digit),
			
 
				+                    Collation::Class(ctype::is_upper)
			
 
				+                ] })
			
 
				+            ]
			
 
				+        );
			
 
				+    }
			
 
				+}
			
--- a/src/ctype.rs
+++ b/src/ctype.rs
@@ -0,0 +1,36 @@
 
				+pub fn is_alnum(c: u8) -> bool {
			
 
				+    is_alpha(c) || is_digit(c)
			
 
				+}
			
 
				+pub fn is_alpha(c: u8) -> bool {
			
 
				+    is_lower(c) || is_upper(c)
			
 
				+}
			
 
				+pub fn is_blank(c: u8) -> bool {
			
 
				+    c == b' ' || c == b'\t'
			
 
				+}
			
 
				+pub fn is_cntrl(c: u8) -> bool {
			
 
				+    c <= 0x1f || c == 0x7f
			
 
				+}
			
 
				+pub fn is_digit(c: u8) -> bool {
			
 
				+    c >= b'0' && c <= b'9'
			
 
				+}
			
 
				+pub fn is_graph(c: u8) -> bool {
			
 
				+    c >= 0x21 && c <= 0x7e
			
 
				+}
			
 
				+pub fn is_lower(c: u8) -> bool {
			
 
				+    c >= b'a' && c <= b'z'
			
 
				+}
			
 
				+pub fn is_print(c: u8) -> bool {
			
 
				+    c >= 0x20 && c <= 0x7e
			
 
				+}
			
 
				+pub fn is_punct(c: u8) -> bool {
			
 
				+    is_graph(c) && !is_alnum(c)
			
 
				+}
			
 
				+pub fn is_space(c: u8) -> bool {
			
 
				+    c == b' ' || (c >= 0x9 && c <= 0xD)
			
 
				+}
			
 
				+pub fn is_upper(c: u8) -> bool {
			
 
				+    c >= b'A' && c <= b'Z'
			
 
				+}
			
 
				+pub fn is_xdigit(c: u8) -> bool {
			
 
				+    is_digit(c) || (c >= b'a' && c <= b'f') || (c >= b'A' && c <= b'F')
			
 
				+}
			
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -0,0 +1,24 @@
 
				+#![cfg_attr(feature = "no_std", no_std)]
			
 
				+#![cfg_attr(feature = "no_std", feature(alloc))]
			
 
				+
			
 
				+#[cfg(feature = "no_std")]
			
 
				+mod std {
			
 
				+    extern crate alloc;
			
 
				+
			
 
				+    pub use alloc::*;
			
 
				+    pub use core::*;
			
 
				+
			
 
				+    pub mod prelude {
			
 
				+        pub use super::alloc::string::String;
			
 
				+        pub use super::alloc::vec::Vec;
			
 
				+    }
			
 
				+}
			
 
				+#[cfg(feature = "no_std")]
			
 
				+use std::prelude::*;
			
 
				+
			
 
				+pub mod compile;
			
 
				+pub mod ctype;
			
 
				+pub mod matcher;
			
 
				+
			
 
				+pub use compile::PosixRegexBuilder;
			
 
				+pub use matcher::PosixRegex;
			
--- a/src/matcher.rs
+++ b/src/matcher.rs
@@ -0,0 +1,200 @@
 
				+//! The matcher: Can find substrings in a string that match any compiled regex
			
 
				+
			
 
				+use compile::{Token, Range};
			
 
				+
			
 
				+/// A regex matcher, ready to match stuff
			
 
				+pub struct PosixRegex {
			
 
				+    pub(crate) search: Vec<(Token, Range)>
			
 
				+}
			
 
				+impl PosixRegex {
			
 
				+    /// Match the string starting at the current position. This does not find
			
 
				+    /// substrings.
			
 
				+    pub fn matches_exact(&self, input: &[u8]) -> Option<PosixRegexResult> {
			
 
				+        // let mut groups = Vec::new();
			
 
				+        let mut matcher = PosixRegexMatcher {
			
 
				+            input,
			
 
				+            state: PosixRegexMatcherState {
			
 
				+                offset: 0
			
 
				+            },
			
 
				+            // groups: &mut groups
			
 
				+        };
			
 
				+        let start = matcher.state.offset;
			
 
				+        if !matcher.matches_exact(&self.search) {
			
 
				+            return None;
			
 
				+        }
			
 
				+        let end = matcher.state.offset;
			
 
				+
			
 
				+        Some(PosixRegexResult {
			
 
				+            start,
			
 
				+            end
			
 
				+        })
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// This is a struct because it might need to keep more stuff later.
			
 
				+// TODO: Maybe remove this.
			
 
				+#[derive(Clone, Copy)]
			
 
				+struct PosixRegexMatcherState {
			
 
				+    offset: usize
			
 
				+}
			
 
				+struct PosixRegexMatcher<'a> {
			
 
				+    input: &'a [u8],
			
 
				+    state: PosixRegexMatcherState,
			
 
				+    // TODO: groups: &'a mut Vec<(usize, usize)>
			
 
				+}
			
 
				+impl<'a> PosixRegexMatcher<'a> {
			
 
				+    fn next(&mut self) -> Option<u8> {
			
 
				+        self.input.get(self.state.offset)
			
 
				+            .map(|&c| { self.state.offset += 1; c })
			
 
				+    }
			
 
				+    fn peek(&self) -> Option<u8> {
			
 
				+        self.input.get(self.state.offset).cloned()
			
 
				+    }
			
 
				+    fn match_token(&mut self, token: &Token) -> bool {
			
 
				+        //println!("Matching {:?} with {:?}", token, &self.input[self.state.offset..]);
			
 
				+        match *token {
			
 
				+            Token::Any => self.next().is_some(),
			
 
				+            Token::Char(c) => self.peek() == Some(c) && self.next().is_some(),
			
 
				+            Token::End => self.next().is_none(),
			
 
				+            Token::Group(_) => unimplemented!("TODO: Groups"),
			
 
				+            Token::OneOf { invert, ref list } => if let Some(c) = self.next() {
			
 
				+                list.iter().any(|collation| collation.matches(c)) == !invert
			
 
				+            } else {
			
 
				+                false
			
 
				+            },
			
 
				+            Token::Start => self.state.offset == 0,
			
 
				+            Token::WordEnd |
			
 
				+            Token::WordStart => unimplemented!("TODO: Word boundaries")
			
 
				+        }
			
 
				+    }
			
 
				+    fn matches_exact(&mut self, mut tokens: &[(Token, Range)]) -> bool {
			
 
				+        loop {
			
 
				+            //println!("Matching {:?} and {:?}", tokens, &self.input[self.state.offset..]);
			
 
				+
			
 
				+            if tokens.is_empty() {
			
 
				+                return true;
			
 
				+            }
			
 
				+
			
 
				+            let (ref token, Range(start, end)) = *tokens.first().unwrap();
			
 
				+            tokens = &tokens[1..];
			
 
				+
			
 
				+            let mut repetition_branches = Vec::new();
			
 
				+
			
 
				+            // Make sure it matches at least <start> times:
			
 
				+            for _ in 1..=start {
			
 
				+                //println!("Must match: {:?}", token);
			
 
				+                if !self.match_token(token) {
			
 
				+                    return false;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            //println!("Matches enough times, at least");
			
 
				+
			
 
				+            // Try all times, greedily (so in reverse order):
			
 
				+            let mut max = end.map(|end| end - start);
			
 
				+
			
 
				+            let original = self.state;
			
 
				+
			
 
				+            while max.map(|max| max > 0).unwrap_or(true) && self.match_token(token) {
			
 
				+                //println!("Repetitions left: {:?}", max);
			
 
				+                repetition_branches.push(self.state);
			
 
				+                max = max.map(|max| max - 1);
			
 
				+            }
			
 
				+
			
 
				+            for branch in repetition_branches.into_iter().rev() {
			
 
				+                self.state = branch;
			
 
				+                //println!("- Branch: {:?}", &self.input[self.state.offset..]);
			
 
				+                if self.matches_exact(tokens) {
			
 
				+                    return true;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            self.state = original;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/// A single result, including start and end bounds
			
 
				+#[derive(Debug, Clone, PartialEq, Eq)]
			
 
				+pub struct PosixRegexResult {
			
 
				+    /// An offset in the original string to where the match started (inclusive)
			
 
				+    pub start: usize,
			
 
				+    /// An offset in the original string to where the match ended (exclusive)
			
 
				+    pub end: usize
			
 
				+}
			
 
				+
			
 
				+#[cfg(test)]
			
 
				+mod tests {
			
 
				+    use super::*;
			
 
				+    use ::PosixRegexBuilder;
			
 
				+
			
 
				+    fn matches_exact(regex: &str, input: &str) -> Option<PosixRegexResult> {
			
 
				+        //println!("----- TRYING TO MATCH {:?} AND {:?}", regex, input);
			
 
				+        PosixRegexBuilder::new(regex.as_bytes())
			
 
				+            .with_default_classes()
			
 
				+            .compile()
			
 
				+            .expect("error compiling regex")
			
 
				+            .matches_exact(input.as_bytes())
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn basic() {
			
 
				+        assert!(matches_exact("abc", "abc").is_some());
			
 
				+        assert!(matches_exact("abc", "bbc").is_none());
			
 
				+        assert!(matches_exact("abc", "acc").is_none());
			
 
				+        assert!(matches_exact("abc", "abd").is_none());
			
 
				+    }
			
 
				+    #[test]
			
 
				+    fn repetitions() {
			
 
				+        assert!(matches_exact("abc*", "ab").is_some());
			
 
				+        assert!(matches_exact("abc*", "abc").is_some());
			
 
				+        assert!(matches_exact("abc*", "abccc").is_some());
			
 
				+
			
 
				+        assert!(matches_exact(r"a\{1,2\}b", "b").is_none());
			
 
				+        assert!(matches_exact(r"a\{1,2\}b", "ab").is_some());
			
 
				+        assert!(matches_exact(r"a\{1,2\}b", "aab").is_some());
			
 
				+        assert!(matches_exact(r"a\{1,2\}b", "aaab").is_none());
			
 
				+    }
			
 
				+    #[test]
			
 
				+    fn any() {
			
 
				+        assert!(matches_exact(".*", "").is_some());
			
 
				+        assert!(matches_exact(".*b", "b").is_some());
			
 
				+        assert!(matches_exact(".*b", "ab").is_some());
			
 
				+        assert!(matches_exact(".*b", "aaaaab").is_some());
			
 
				+        assert!(matches_exact(".*b", "HELLO WORLD").is_none());
			
 
				+        assert!(matches_exact(".*b", "HELLO WORLDb").is_some());
			
 
				+        assert!(matches_exact("H.*O WORLD", "HELLO WORLD").is_some());
			
 
				+    }
			
 
				+    #[test]
			
 
				+    fn brackets() {
			
 
				+        assert!(matches_exact("[abc]*d", "abcd").is_some());
			
 
				+        assert!(matches_exact("[0-9]*d", "1234d").is_some());
			
 
				+        assert!(matches_exact("[[:digit:]]*d", "1234d").is_some());
			
 
				+        assert!(matches_exact("[[:digit:]]*d", "abcd").is_none());
			
 
				+    }
			
 
				+    #[test]
			
 
				+    fn offsets() {
			
 
				+        assert_eq!(matches_exact("abc", "abcd"), Some(PosixRegexResult { start: 0, end: 3 }));
			
 
				+        assert_eq!(matches_exact(r"[[:alpha:]]\+", "abcde12345"), Some(PosixRegexResult { start: 0, end: 5 }));
			
 
				+    }
			
 
				+    #[test]
			
 
				+    fn start_and_end() {
			
 
				+        assert!(matches_exact("^abc$", "abc").is_some());
			
 
				+        assert!(matches_exact("abc$", "abcd").is_none());
			
 
				+        assert!(matches_exact("^bcd", "abcd").is_none());
			
 
				+    }
			
 
				+    //#[test]
			
 
				+    //fn groups() {
			
 
				+    //    assert!(matches_exact(r"\(a*\|b\|c\)d", "d").is_some());
			
 
				+    //    assert!(matches_exact(r"\(a*\|b\|c\)d", "aaaad").is_some());
			
 
				+    //    assert!(matches_exact(r"\(a*\|b\|c\)d", "bd").is_some());
			
 
				+    //    assert!(matches_exact(r"\(a*\|b\|c\)d", "bbbbbd").is_none());
			
 
				+    //}
			
 
				+    //#[test]
			
 
				+    //fn repeating_groups() {
			
 
				+    //    assert!(matches_exact(r"\(a\|b\|c\)*d", "d").is_some());
			
 
				+    //    assert!(matches_exact(r"\(a\|b\|c\)*d", "aaaad").is_some());
			
 
				+    //    assert!(matches_exact(r"\(a\|b\|c\)*d", "bbbbd").is_some());
			
 
				+    //    assert!(matches_exact(r"\(a\|b\|c\)*d", "aabbd").is_some());
			
 
				+    //}
			
 
				+}