Explorar el Código

Merge branch 'housekeeping' into 'master'

Spring cleanup

See merge request redox-os/posix-regex!1
jD91mZM2 hace 5 años
padre
commit
df45954609
Se han modificado 5 ficheros con 333 adiciones y 215 borrados
  1. 1 1
      rust-toolchain
  2. 88 50
      src/compile.rs
  3. 13 13
      src/immut_vec.rs
  4. 185 111
      src/matcher.rs
  5. 46 40
      src/tree.rs

+ 1 - 1
rust-toolchain

@@ -1 +1 @@
-nightly-2018-11-07
+nightly-2019-05-10

+ 88 - 50
src/compile.rs

@@ -7,8 +7,8 @@ use std::prelude::*;
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fmt;
-use {ctype, PosixRegex};
 use tree::*;
+use {ctype, PosixRegex};
 
 /// Repetition bounds, for example + is (1, None), and ? is (0, Some(1))
 #[derive(Clone, Copy, PartialEq, Eq)]
@@ -26,19 +26,21 @@ impl fmt::Debug for Range {
 #[derive(Clone, PartialEq, Eq)]
 pub enum Collation {
     Char(u8),
-    Class(fn(u8) -> bool)
+    Class(fn(u8) -> bool),
 }
 impl Collation {
     /// Compare this collation to a character
     pub fn matches(&self, other: u8, insensitive: bool) -> bool {
         match *self {
-            Collation::Char(me) if insensitive => if ctype::is_alpha(me) && ctype::is_alpha(other) {
-                me | 32 == other | 32
-            } else {
-                me == other
-            },
+            Collation::Char(me) if insensitive => {
+                if ctype::is_alpha(me) && ctype::is_alpha(other) {
+                    me | 32 == other | 32
+                } else {
+                    me == other
+                }
+            }
             Collation::Char(me) => me == other,
-            Collation::Class(f) => f(other)
+            Collation::Class(f) => f(other),
         }
     }
 }
@@ -65,12 +67,12 @@ pub enum Token {
     Group(usize),
     OneOf {
         invert: bool,
-        list: Vec<Collation>
+        list: Vec<Collation>,
     },
     Root,
     Start,
     WordEnd,
-    WordStart
+    WordStart,
 }
 impl fmt::Debug for Token {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -87,7 +89,7 @@ impl fmt::Debug for Token {
             Token::Root => write!(f, "Root"),
             Token::Start => write!(f, "^"),
             Token::WordEnd => write!(f, ">"),
-            Token::WordStart => write!(f, "<")
+            Token::WordStart => write!(f, "<"),
         }
     }
 }
@@ -104,7 +106,7 @@ pub enum Error {
     UnclosedRepetition,
     UnexpectedToken(u8),
     UnknownClass(Vec<u8>),
-    UnknownCollation
+    UnknownCollation,
 }
 
 /// A regex builder struct
@@ -112,7 +114,7 @@ pub struct PosixRegexBuilder<'a> {
     input: &'a [u8],
     classes: HashMap<&'a [u8], fn(u8) -> bool>,
     group_id: usize,
-    builder: TreeBuilder
+    builder: TreeBuilder,
 }
 impl<'a> PosixRegexBuilder<'a> {
     /// Create a new instance that is ready to parse the regex `input`
@@ -121,7 +123,7 @@ impl<'a> PosixRegexBuilder<'a> {
             input,
             classes: HashMap::new(),
             group_id: 1,
-            builder: TreeBuilder::default()
+            builder: TreeBuilder::default(),
         }
     }
     /// Add a custom collation class, for use within square brackets (such as `[[:digit:]]`)
@@ -168,16 +170,22 @@ impl<'a> PosixRegexBuilder<'a> {
         let mut out: Option<u32> = None;
         while let Some(&c @ b'0'..=b'9') = self.input.first() {
             self.consume(1);
-            out = Some(out.unwrap_or(0)
-                .checked_mul(10)
-                .and_then(|out| out.checked_add((c - b'0') as u32))
-                .ok_or(Error::IntegerOverflow)?);
+            out = Some(
+                out.unwrap_or(0)
+                    .checked_mul(10)
+                    .and_then(|out| out.checked_add((c - b'0') as u32))
+                    .ok_or(Error::IntegerOverflow)?,
+            );
         }
         Ok(out)
     }
     fn next(&mut self) -> Result<u8, Error> {
-        self.input.first()
-            .map(|&c| { self.consume(1); c })
+        self.input
+            .first()
+            .map(|&c| {
+                self.consume(1);
+                c
+            })
             .ok_or(Error::EOF)
     }
     fn expect(&mut self, c: u8) -> Result<(), Error> {
@@ -215,10 +223,10 @@ impl<'a> PosixRegexBuilder<'a> {
                         }
                         range = Range(first, second);
                         None
-                    },
-                    _ => None
+                    }
+                    _ => None,
                 },
-                _ => None
+                _ => None,
             };
             if let Some((consume, new)) = new {
                 range = new;
@@ -228,7 +236,8 @@ impl<'a> PosixRegexBuilder<'a> {
         Ok(range)
     }
     fn parse(&mut self) -> Result<(), Error> {
-        self.builder.start_internal(Token::Alternative, Range(1, Some(1)));
+        self.builder
+            .start_internal(Token::Alternative, Range(1, Some(1)));
         while let Ok(c) = self.next() {
             let token = match c {
                 b'^' => Token::Start,
@@ -256,33 +265,41 @@ impl<'a> PosixRegexBuilder<'a> {
                                     c = self.next()?;
                                     self.expect(b'.')?;
                                     self.expect(b']')?;
-                                },
+                                }
                                 b'=' => {
                                     c = self.next()?;
                                     self.expect(b'=')?;
                                     self.expect(b']')?;
-                                },
+                                }
                                 b':' => {
-                                    let end = self.input.iter().position(|&c| c == b':').ok_or(Error::EOF)?;
+                                    let end = self
+                                        .input
+                                        .iter()
+                                        .position(|&c| c == b':')
+                                        .ok_or(Error::EOF)?;
                                     let key = &self.input[..end];
-                                    let class = *self.classes.get(key).ok_or_else(|| Error::UnknownClass(key.to_vec()))?;
+                                    let class = *self
+                                        .classes
+                                        .get(key)
+                                        .ok_or_else(|| Error::UnknownClass(key.to_vec()))?;
                                     self.consume(end + 1);
                                     self.expect(b']')?;
 
                                     list.push(Collation::Class(class));
                                     push = false;
-                                },
-                                _ => return Err(Error::UnknownCollation)
+                                }
+                                _ => return Err(Error::UnknownCollation),
                             }
                         }
 
                         if push {
                             list.push(Collation::Char(c));
 
-                            if self.input.first() == Some(&b'-') && self.input.get(1) != Some(&b']') {
+                            if self.input.first() == Some(&b'-') && self.input.get(1) != Some(&b']')
+                            {
                                 self.consume(1);
                                 let dest = self.next()?;
-                                for c in (c+1)..=dest {
+                                for c in (c + 1)..=dest {
                                     list.push(Collation::Char(c));
                                 }
                             }
@@ -294,12 +311,15 @@ impl<'a> PosixRegexBuilder<'a> {
                         }
                     }
 
-                    Token::OneOf {
-                        invert,
-                        list
-                    }
-                },
-                b'\\' if self.input.first().map(|&c| (c as char).is_digit(10)).unwrap_or(false) => {
+                    Token::OneOf { invert, list }
+                }
+                b'\\'
+                    if self
+                        .input
+                        .first()
+                        .map(|&c| (c as char).is_digit(10))
+                        .unwrap_or(false) =>
+                {
                     let id = self.take_int()?.unwrap();
                     if (id as usize) >= self.group_id {
                         return Err(Error::InvalidBackRef(id));
@@ -313,28 +333,42 @@ impl<'a> PosixRegexBuilder<'a> {
                         let checkpoint = self.builder.checkpoint();
                         self.parse()?;
                         let range = self.parse_range()?;
-                        self.builder.start_internal_at(checkpoint, Token::Group(id), range);
+                        self.builder
+                            .start_internal_at(checkpoint, Token::Group(id), range);
                         self.builder.finish_internal();
                         continue;
-                    },
+                    }
                     b')' => break,
                     b'|' => {
                         self.builder.finish_internal();
-                        self.builder.start_internal(Token::Alternative, Range(1, Some(1)));
+                        self.builder
+                            .start_internal(Token::Alternative, Range(1, Some(1)));
                         continue;
-                    },
+                    }
                     b'<' => Token::WordStart,
                     b'>' => Token::WordEnd,
-                    b'a' => Token::OneOf { invert: false, list: vec![Collation::Class(ctype::is_alnum)] },
-                    b'd' => Token::OneOf { invert: false, list: vec![Collation::Class(ctype::is_digit)] },
-                    b's' => Token::OneOf { invert: false, list: vec![Collation::Class(ctype::is_space)] },
-                    b'S' => Token::OneOf { invert: true,  list: vec![Collation::Class(ctype::is_space)] },
+                    b'a' => Token::OneOf {
+                        invert: false,
+                        list: vec![Collation::Class(ctype::is_alnum)],
+                    },
+                    b'd' => Token::OneOf {
+                        invert: false,
+                        list: vec![Collation::Class(ctype::is_digit)],
+                    },
+                    b's' => Token::OneOf {
+                        invert: false,
+                        list: vec![Collation::Class(ctype::is_space)],
+                    },
+                    b'S' => Token::OneOf {
+                        invert: true,
+                        list: vec![Collation::Class(ctype::is_space)],
+                    },
                     b'n' => Token::Char(b'\n'),
                     b'r' => Token::Char(b'\r'),
                     b't' => Token::Char(b'\t'),
-                    c => Token::Char(c)
+                    c => Token::Char(c),
                 },
-                c => Token::Char(c)
+                c => Token::Char(c),
             };
             let range = self.parse_range()?;
             self.builder.leaf(token, range);
@@ -545,11 +579,15 @@ Root 1..1
         );
         assert_eq!(
             compile(b"[[:digit:][:upper:]]"),
-            format!("\
+            format!(
+                "\
 Root 1..1
   Alternative 1..1
     {{invert: false, [{:p}, {:p}]}} 1..1
-", ctype::is_digit as fn(u8) -> bool, ctype::is_upper as fn(u8) -> bool)
+",
+                ctype::is_digit as fn(u8) -> bool,
+                ctype::is_upper as fn(u8) -> bool
+            )
         );
     }
     #[test]

+ 13 - 13
src/immut_vec.rs

@@ -5,27 +5,24 @@ use std::cell::RefCell;
 
 pub struct ImmutVecItem<T> {
     prev: Option<usize>,
-    data: T
+    data: T,
 }
 pub struct ImmutVec<'a, T> {
     inner: &'a RefCell<Vec<ImmutVecItem<T>>>,
-    id: Option<usize>
+    id: Option<usize>,
 }
 impl<'a, T> Copy for ImmutVec<'a, T> {}
 impl<'a, T> Clone for ImmutVec<'a, T> {
     fn clone(&self) -> Self {
         Self {
             inner: self.inner,
-            id: self.id
+            id: self.id,
         }
     }
 }
 impl<'a, T> ImmutVec<'a, T> {
     pub fn new(inner: &'a RefCell<Vec<ImmutVecItem<T>>>) -> Self {
-        Self {
-            inner,
-            id: None
-        }
+        Self { inner, id: None }
     }
     #[must_use = "push does nothing to the original vector"]
     pub fn push(self, item: T) -> Self {
@@ -33,7 +30,7 @@ impl<'a, T> ImmutVec<'a, T> {
         let id = inner.len();
         inner.push(ImmutVecItem {
             prev: self.id,
-            data: item
+            data: item,
         });
         Self {
             id: Some(id),
@@ -47,13 +44,16 @@ impl<'a, T: Clone> ImmutVec<'a, T> {
         let inner = self.inner.borrow();
         let id = match self.id {
             None => return (self, None),
-            Some(id) => id
+            Some(id) => id,
         };
         let item = &inner[id];
-        (Self {
-            id: item.prev,
-            ..self
-        }, Some(item.data.clone()))
+        (
+            Self {
+                id: item.prev,
+                ..self
+            },
+            Some(item.data.clone()),
+        )
     }
     pub fn iter_rev(self) -> ImmutVecIter<'a, T> {
         ImmutVecIter(self)

+ 185 - 111
src/matcher.rs

@@ -4,15 +4,15 @@
 use std::prelude::*;
 
 use std::borrow::Cow;
+use std::cell::RefCell;
 use std::collections::HashSet;
 use std::fmt;
-use std::cell::RefCell;
 use std::rc::Rc;
 
-use compile::{Token, Range};
+use compile::{Range, Token};
 use ctype;
 use immut_vec::ImmutVec;
-use tree::{*, Node as TreeNode};
+use tree::{Node as TreeNode, *};
 
 /// A regex matcher, ready to match stuff
 #[derive(Clone)]
@@ -21,7 +21,7 @@ pub struct PosixRegex<'a> {
     case_insensitive: bool,
     newline: bool,
     no_start: bool,
-    no_end: bool
+    no_end: bool,
 }
 impl<'a> PosixRegex<'a> {
     /// Create a new matcher instance from the specified alternations. This
@@ -33,7 +33,7 @@ impl<'a> PosixRegex<'a> {
             case_insensitive: false,
             newline: false,
             no_start: false,
-            no_end: false
+            no_end: false,
         }
     }
     /// Chainable function to enable/disable case insensitivity. Default: false.
@@ -74,7 +74,10 @@ impl<'a> PosixRegex<'a> {
                 cursor = node.child;
             } else {
                 let mut node = Some(node);
-                while node.map(|node| node.next_sibling.is_none()).unwrap_or(false) {
+                while node
+                    .map(|node| node.next_sibling.is_none())
+                    .unwrap_or(false)
+                {
                     node = node.unwrap().parent.map(|node| &self.tree[node]);
                 }
                 cursor = node.and_then(|node| node.next_sibling);
@@ -94,12 +97,17 @@ impl<'a> PosixRegex<'a> {
             base: self,
             input,
             offset: 0,
-            max_groups: self.count_groups()
+            max_groups: self.count_groups(),
         };
         let internal_prev = RefCell::new(Vec::new());
         let prev = ImmutVec::new(&internal_prev);
-        let tree = self.tree[self.tree.root].children(&self.tree)
-            .filter_map(|node| self.tree[node].child.map(|child| Node::new(&self.tree, child, prev)))
+        let tree = self.tree[self.tree.root]
+            .children(&self.tree)
+            .filter_map(|node| {
+                self.tree[node]
+                    .child
+                    .map(|child| Node::new(&self.tree, child, prev))
+            })
             .collect();
 
         let start = matcher.offset;
@@ -113,12 +121,16 @@ impl<'a> PosixRegex<'a> {
         }
     }
     /// Match any substrings in the string, but optionally no more than `max`
-    pub fn matches(&self, input: &[u8], mut max: Option<usize>) -> Vec<Box<[Option<(usize, usize)>]>> {
+    pub fn matches(
+        &self,
+        input: &[u8],
+        mut max: Option<usize>,
+    ) -> Vec<Box<[Option<(usize, usize)>]>> {
         let mut matcher = PosixRegexMatcher {
             base: self,
             input,
             offset: 0,
-            max_groups: self.count_groups()
+            max_groups: self.count_groups(),
         };
 
         let mut arena = self.tree.arena.to_vec();
@@ -132,7 +144,7 @@ impl<'a> PosixRegex<'a> {
             range: Range(1, Some(1)),
             parent: None,
             next_sibling: None,
-            child: root
+            child: root,
         });
 
         // Update parents
@@ -150,12 +162,12 @@ impl<'a> PosixRegex<'a> {
             range: Range(0, None),
             parent: None,
             next_sibling: Some(group_id),
-            child: None
+            child: None,
         });
 
         let tree = Tree {
             arena: arena.into_boxed_slice(),
-            root: start_id
+            root: start_id,
         };
         let internal_prev = RefCell::new(Vec::new());
         let prev = ImmutVec::new(&internal_prev);
@@ -169,8 +181,8 @@ impl<'a> PosixRegex<'a> {
                         matcher.offset += 1;
                     }
                     matches.push(groups)
-                },
-                None => break
+                }
+                None => break,
             }
             max = max.map(|max| max - 1);
         }
@@ -182,13 +194,13 @@ impl<'a> PosixRegex<'a> {
 struct GroupEvent {
     open: bool,
     id: usize,
-    offset: usize
+    offset: usize,
 }
 #[derive(Clone, Copy)]
 struct BackRef {
     offset: usize,
     index: usize,
-    len: usize
+    len: usize,
 }
 
 #[derive(Clone)]
@@ -198,7 +210,7 @@ struct Node<'a> {
     node: NodeId,
     prev: ImmutVec<'a, GroupEvent>,
     repeated: u32,
-    backref: Option<BackRef>
+    backref: Option<BackRef>,
 }
 impl<'a> fmt::Debug for Node<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -231,7 +243,7 @@ impl<'a> Node<'a> {
                 me.backref = Some(BackRef {
                     offset: start,
                     index: 0,
-                    len: end - start
+                    len: end - start,
                 });
                 if start == end {
                     // Empty group, mark as repeated enough times
@@ -245,19 +257,19 @@ impl<'a> Node<'a> {
     /// Create a new node. This is only called from the main function to start each alternative path
     fn new(tree: &'a Tree, node: NodeId, prev: ImmutVec<'a, GroupEvent>) -> Self {
         Self::prepare(Self {
-            tree: tree,
+            tree,
             parent: None,
             node,
             prev,
             repeated: 0,
-            backref: None
+            backref: None,
         })
     }
     /// Expand this group node into its children
     fn into_children(mut self, branches: &mut Vec<Node<'a>>, offset: usize) {
         let id = match self.tree[self.node].token {
             Token::Group(id) => id,
-            _ => return
+            _ => return,
         };
         self.repeated += 1;
         let mut parent = Rc::new(self);
@@ -275,18 +287,15 @@ impl<'a> Node<'a> {
                         offset,
                     }),
                     repeated: 0,
-                    backref: None
+                    backref: None,
                 }));
             }
         }
         if empty {
-            let mut parent = Rc::get_mut(&mut parent).expect("group empty but still there's a dangling reference");
+            let mut parent = Rc::get_mut(&mut parent)
+                .expect("group empty but still there's a dangling reference");
             for &open in &[true, false] {
-                parent.prev = parent.prev.push(GroupEvent {
-                    open,
-                    id,
-                    offset
-                });
+                parent.prev = parent.prev.push(GroupEvent { open, id, offset });
             }
             parent.add_branches(branches, offset);
         }
@@ -296,7 +305,11 @@ impl<'a> Node<'a> {
         &self.tree[self.node]
     }
     /// Get a list of all capturing groups
-    fn get_capturing_groups(&self, max_count: usize, offset: usize) -> Box<[Option<(usize, usize)>]> {
+    fn get_capturing_groups(
+        &self,
+        max_count: usize,
+        offset: usize,
+    ) -> Box<[Option<(usize, usize)>]> {
         let mut prev = self.prev;
 
         // Close all currently open groups
@@ -304,13 +317,12 @@ impl<'a> Node<'a> {
         while let Some(group) = parent {
             let group = &self.tree[group];
             parent = group.parent;
-            match group.token {
-                Token::Group(id) => prev = prev.push(GroupEvent {
+            if let Token::Group(id) = group.token {
+                prev = prev.push(GroupEvent {
                     open: false,
                     id,
-                    offset
-                }),
-                _ => ()
+                    offset,
+                })
             }
         }
 
@@ -324,7 +336,8 @@ impl<'a> Node<'a> {
                 group.1 = group.1.or(Some(event.offset));
             }
         }
-        groups.into_iter()
+        groups
+            .into_iter()
             .map(|(start, end)| Some((start?, end?)))
             .collect::<Vec<_>>()
             .into_boxed_slice()
@@ -345,7 +358,11 @@ impl<'a> Node<'a> {
     /// possibly repeat the parent
     fn add_branches(&self, branches: &mut Vec<Node<'a>>, offset: usize) {
         let Range(min, _) = self.node().range;
-        if self.backref.map(|backref| backref.index > 0 || self.repeated < min).unwrap_or(false) {
+        if self
+            .backref
+            .map(|backref| backref.index > 0 || self.repeated < min)
+            .unwrap_or(false)
+        {
             // Wait for back reference to complete
         } else if let Some(next) = self.node().next_sibling {
             branches.push(Self::prepare(Self {
@@ -355,7 +372,7 @@ impl<'a> Node<'a> {
         } else {
             let parent = match self.parent {
                 Some(ref parent) => parent,
-                None => return
+                None => return,
             };
             let Range(min, _) = parent.node().range;
 
@@ -374,17 +391,22 @@ impl<'a> Node<'a> {
             if parent.repeated >= min {
                 // Group is closing, migrate previous & current groups to next.
                 let mut parent = Some(parent);
-                while parent.map(|parent| parent.node().next_sibling.is_none()).unwrap_or(false) {
+                while parent
+                    .map(|parent| parent.node().next_sibling.is_none())
+                    .unwrap_or(false)
+                {
                     parent = parent.unwrap().parent.as_ref();
                 }
-                if let Some((node, next)) = parent.and_then(|parent| parent.node().next_sibling.map(|node| (parent, node))) {
+                if let Some((node, next)) =
+                    parent.and_then(|parent| parent.node().next_sibling.map(|node| (parent, node)))
+                {
                     let clone = (**node).clone();
                     let mut prev = self.prev;
                     for &id in &ids {
                         prev = prev.push(GroupEvent {
                             open: false,
                             id,
-                            offset
+                            offset,
                         });
                     }
                     branches.push(Self::prepare(Self {
@@ -407,7 +429,7 @@ impl<'a> Node<'a> {
                         prev = prev.push(GroupEvent {
                             open: false,
                             id,
-                            offset
+                            offset,
                         });
                     }
                     clone.prev = prev;
@@ -442,7 +464,8 @@ impl<'a> Node<'a> {
             }
             next = current.parent.as_ref().map(|node| &**node);
         }
-        next.and_then(|node| self.tree[node.node].next_sibling).is_none()
+        next.and_then(|node| self.tree[node.node].next_sibling)
+            .is_none()
     }
 }
 
@@ -450,10 +473,14 @@ struct PosixRegexMatcher<'a> {
     base: &'a PosixRegex<'a>,
     input: &'a [u8],
     offset: usize,
-    max_groups: usize
+    max_groups: usize,
 }
 impl<'a> PosixRegexMatcher<'a> {
-    fn expand<'b>(&mut self, skip: &mut HashSet<NodeId>, branches: &mut [Node<'b>]) -> Vec<Node<'b>> {
+    fn expand<'b>(
+        &mut self,
+        skip: &mut HashSet<NodeId>,
+        branches: &mut [Node<'b>],
+    ) -> Vec<Node<'b>> {
         let mut insert = Vec::new();
 
         for branch in &mut *branches {
@@ -488,7 +515,10 @@ impl<'a> PosixRegexMatcher<'a> {
         // Whether or not any branch, at any point, got fully explored. This
         // means at least one path of the regex successfully completed!
         let mut succeeded = None;
-        let mut prev = self.offset.checked_sub(1).and_then(|index| self.input.get(index).cloned());
+        let mut prev = self
+            .offset
+            .checked_sub(1)
+            .and_then(|index| self.input.get(index).cloned());
 
         let mut set = HashSet::new();
 
@@ -506,39 +536,41 @@ impl<'a> PosixRegexMatcher<'a> {
 
                 while index < branches.len() {
                     if remove > 0 {
-                        branches.swap(index, index-remove);
+                        branches.swap(index, index - remove);
                     }
-                    let branch = &mut branches[index-remove];
+                    let branch = &mut branches[index - remove];
                     index += 1;
 
                     let node = branch.node();
 
                     match node.token {
-                        Token::End |
-                        Token::Start |
-                        Token::WordEnd |
-                        Token::WordStart => {
+                        Token::End | Token::Start | Token::WordEnd | Token::WordStart => {
                             let accepts = match node.token {
-                                Token::End =>
+                                Token::End => {
                                     (!self.base.no_end && next.is_none())
-                                        || (self.base.newline && next == Some(b'\n')),
-                                Token::Start =>
+                                        || (self.base.newline && next == Some(b'\n'))
+                                }
+                                Token::Start => {
                                     (!self.base.no_start && self.offset == 0)
-                                        || (self.base.newline && prev == Some(b'\n')),
+                                        || (self.base.newline && prev == Some(b'\n'))
+                                }
                                 Token::WordEnd => next.map(ctype::is_word_boundary).unwrap_or(true),
-                                Token::WordStart => prev.map(ctype::is_word_boundary).unwrap_or(true),
-                                _ => unreachable!()
+                                Token::WordStart => {
+                                    prev.map(ctype::is_word_boundary).unwrap_or(true)
+                                }
+                                _ => unreachable!(),
                             };
                             if accepts {
                                 branch.increment();
                                 branch.add_branches(&mut insert, self.offset);
                             }
                             if branch.is_final() {
-                                succeeded = Some(branch.get_capturing_groups(self.max_groups, self.offset));
+                                succeeded =
+                                    Some(branch.get_capturing_groups(self.max_groups, self.offset));
                             }
                             remove += 1;
-                        },
-                        _ => ()
+                        }
+                        _ => (),
                     }
                 }
                 branches.truncate(branches.len() - remove);
@@ -560,40 +592,56 @@ impl<'a> PosixRegexMatcher<'a> {
                 if remove > 0 {
                     // Just like Rust's `retain` function, shift all elements I
                     // want to keep back and `truncate` when I'm done.
-                    branches.swap(index, index-remove);
+                    branches.swap(index, index - remove);
                 }
-                let branch = &mut branches[index-remove];
+                let branch = &mut branches[index - remove];
                 index += 1;
 
                 let node = branch.node();
                 let Range(_, max) = node.range;
 
                 // Step 3: Check if the token matches
-                let accepts = max.map(|max| branch.repeated < max).unwrap_or(true) && match node.token {
-                    Token::InternalStart => next.is_some(),
-                    Token::Group { .. } => false, // <- content is already expanded and handled
-
-                    Token::Any => next.map(|c| !self.base.newline || c != b'\n').unwrap_or(false),
-                    Token::BackRef(_) => if let Some(ref backref) = branch.backref {
-                        next == Some(self.input[backref.offset + backref.index])
-                    } else { false },
-                    Token::Char(c) => if self.base.case_insensitive {
-                        next.map(|c2| c & !32 == c2 & !32).unwrap_or(false)
-                    } else {
-                        next == Some(c)
-                    },
-                    Token::OneOf { invert, ref list } => if let Some(next) = next {
-                        (!invert || !self.base.newline || next != b'\n')
-                        && list.iter().any(|c| c.matches(next, self.base.case_insensitive)) == !invert
-                    } else { false },
-
-                    Token::Alternative
-                    | Token::End
-                    | Token::Root
-                    | Token::Start
-                    | Token::WordEnd
-                    | Token::WordStart => unreachable!()
-                };
+                let accepts = max.map(|max| branch.repeated < max).unwrap_or(true)
+                    && match node.token {
+                        Token::InternalStart => next.is_some(),
+                        Token::Group { .. } => false, // <- content is already expanded and handled
+
+                        Token::Any => next
+                            .map(|c| !self.base.newline || c != b'\n')
+                            .unwrap_or(false),
+                        Token::BackRef(_) => {
+                            if let Some(ref backref) = branch.backref {
+                                next == Some(self.input[backref.offset + backref.index])
+                            } else {
+                                false
+                            }
+                        }
+                        Token::Char(c) => {
+                            if self.base.case_insensitive {
+                                next.map(|c2| c & !32 == c2 & !32).unwrap_or(false)
+                            } else {
+                                next == Some(c)
+                            }
+                        }
+                        Token::OneOf { invert, ref list } => {
+                            if let Some(next) = next {
+                                (!invert || !self.base.newline || next != b'\n')
+                                    && list
+                                        .iter()
+                                        .any(|c| c.matches(next, self.base.case_insensitive))
+                                        == !invert
+                            } else {
+                                false
+                            }
+                        }
+
+                        Token::Alternative
+                        | Token::End
+                        | Token::Root
+                        | Token::Start
+                        | Token::WordEnd
+                        | Token::WordStart => unreachable!(),
+                    };
 
                 if accepts {
                     branch.increment();
@@ -623,7 +671,8 @@ impl<'a> PosixRegexMatcher<'a> {
 
             if branches.is_empty() ||
                     // The internal start thing is lazy, not greedy:
-                    (succeeded.is_some() && branches.iter().all(|t| t.node().token == Token::InternalStart)) {
+                    (succeeded.is_some() && branches.iter().all(|t| t.node().token == Token::InternalStart))
+            {
                 return succeeded;
             }
 
@@ -644,7 +693,7 @@ mod tests {
     use self::test::Bencher;
 
     use super::*;
-    use ::PosixRegexBuilder;
+    use PosixRegexBuilder;
 
     // FIXME: Workaround to coerce a Box<[T; N]> into a Box<[T]>. Use type
     // ascription when stabilized.
@@ -665,12 +714,10 @@ mod tests {
             .expect("error compiling regex")
     }
     fn matches(regex: &str, input: &str) -> Vec<Box<[Option<(usize, usize)>]>> {
-        compile(regex)
-            .matches(input.as_bytes(), None)
+        compile(regex).matches(input.as_bytes(), None)
     }
     fn matches_exact(regex: &str, input: &str) -> Option<Box<[Option<(usize, usize)>]>> {
-        compile(regex)
-            .matches_exact(input.as_bytes())
+        compile(regex).matches_exact(input.as_bytes())
     }
 
     #[test]
@@ -724,10 +771,7 @@ mod tests {
     }
     #[test]
     fn offsets() {
-        assert_eq!(
-            matches_exact("abc", "abcd"),
-            Some(abox![Some((0, 3))])
-        );
+        assert_eq!(matches_exact("abc", "abcd"), Some(abox![Some((0, 3))]));
         assert_eq!(
             matches_exact(r"[[:alpha:]]\+", "abcde12345"),
             Some(abox![Some((0, 5))])
@@ -758,7 +802,13 @@ mod tests {
         );
         assert_eq!(
             matches_exact(r"\(a \(b\) \(c\)\) \(d\)", "a b c d"),
-            Some(abox![Some((0, 7)), Some((0, 5)), Some((2, 3)), Some((4, 5)), Some((6, 7))])
+            Some(abox![
+                Some((0, 7)),
+                Some((0, 5)),
+                Some((2, 3)),
+                Some((4, 5)),
+                Some((6, 7))
+            ])
         );
         assert_eq!(
             matches_exact(r"\(.\)*", "hello"),
@@ -778,7 +828,12 @@ mod tests {
         );
         assert_eq!(
             matches_exact(r"\(a\|\(b\)\)*\(c\)", "bababac"),
-            Some(abox![Some((0, 7)), Some((5, 6)), Some((4, 5)), Some((6, 7))])
+            Some(abox![
+                Some((0, 7)),
+                Some((5, 6)),
+                Some((4, 5)),
+                Some((6, 7))
+            ])
         );
         assert_eq!(
             matches_exact(r"\(a\|\(b\)\)*\(c\)", "aaac"),
@@ -797,11 +852,21 @@ mod tests {
         );
         assert_eq!(
             matches(r"o\+", "helloooooooo woooorld, hooow are you?"),
-            vec![abox![Some((4, 12))], abox![Some((14, 18))], abox![Some((24, 27))], abox![Some((34, 35))]]
+            vec![
+                abox![Some((4, 12))],
+                abox![Some((14, 18))],
+                abox![Some((24, 27))],
+                abox![Some((34, 35))]
+            ]
         );
         assert_eq!(
             matches(r"z*", "abc"),
-            vec![abox![Some((0, 0))], abox![Some((1, 1))], abox![Some((2, 2))], abox![Some((3, 3))]]
+            vec![
+                abox![Some((0, 0))],
+                abox![Some((1, 1))],
+                abox![Some((2, 2))],
+                abox![Some((3, 3))]
+            ]
         );
     }
     #[test]
@@ -895,15 +960,18 @@ mod tests {
         assert!(matches_exact(
             r"\(hello \(\<.*\>\) \)*how are you \2",
             "hello world how are you world"
-        ).is_some());
+        )
+        .is_some());
         assert!(matches_exact(
             r"\(hello \(\<.*\>\) \)*how are you \2",
             "hello universe hello world how are you world"
-        ).is_some());
+        )
+        .is_some());
         assert!(matches_exact(
             r"\(hello \(\<.*\>\) \)*how are you \2",
             "hello world hello universe how are you world"
-        ).is_none());
+        )
+        .is_none());
     }
     #[test]
     fn case_insensitive() {
@@ -918,10 +986,13 @@ mod tests {
     }
     #[test]
     fn newline() {
-        assert_eq!(compile(r"^hello$")
-            .newline(true)
-            .matches(b"hi\nhello\ngreetings", None)
-            .len(), 1);
+        assert_eq!(
+            compile(r"^hello$")
+                .newline(true)
+                .matches(b"hi\nhello\ngreetings", None)
+                .len(),
+            1
+        );
         assert!(compile(r"^hello$")
             .newline(true)
             .matches(b"hi\ngood day\ngreetings", None)
@@ -950,7 +1021,10 @@ mod tests {
     #[bench]
     fn speed_matches(b: &mut Bencher) {
         b.iter(|| {
-            assert_eq!(matches(r"\(\(a*\|b\|c\) test\|yee\)", "oooo aaaaa test").len(), 1);
+            assert_eq!(
+                matches(r"\(\(a*\|b\|c\) test\|yee\)", "oooo aaaaa test").len(),
+                1
+            );
         })
     }
 }

+ 46 - 40
src/tree.rs

@@ -4,7 +4,7 @@ use std::prelude::*;
 use std::fmt;
 use std::ops::{Index, IndexMut};
 
-use compile::{Token, Range};
+use compile::{Range, Token};
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct NodeId(usize);
@@ -25,7 +25,7 @@ pub struct Node {
     pub range: Range,
     pub parent: Option<NodeId>,
     pub next_sibling: Option<NodeId>,
-    pub child: Option<NodeId>
+    pub child: Option<NodeId>,
 }
 impl Node {
     pub fn children<'a>(&self, arena: &'a Tree) -> NodeIter<'a> {
@@ -53,14 +53,14 @@ impl<'a> Iterator for NodeIter<'a> {
 }
 
 pub struct Checkpoint {
-    cursor: Option<NodeId>
+    cursor: Option<NodeId>,
 }
 
 #[derive(Default)]
 pub struct TreeBuilder {
     arena: Vec<Node>,
     parent: Option<NodeId>,
-    cursor: Option<NodeId>
+    cursor: Option<NodeId>,
 }
 impl TreeBuilder {
     fn insert(&mut self, token: Token, range: Range) -> NodeId {
@@ -70,13 +70,14 @@ impl TreeBuilder {
             range,
             parent: self.parent,
             next_sibling: None,
-            child: None
+            child: None,
         });
         if let Some(prev) = self.cursor {
             self.arena[usize::from(prev)].next_sibling = Some(id);
         }
         if let Some(parent) = self.parent {
-            self.arena[usize::from(parent)].child = self.arena[usize::from(parent)].child.or(Some(id));
+            self.arena[usize::from(parent)].child =
+                self.arena[usize::from(parent)].child.or(Some(id));
         }
         id
     }
@@ -89,11 +90,13 @@ impl TreeBuilder {
     }
     pub fn finish_internal(&mut self) {
         self.cursor = self.parent;
-        self.parent = self.parent.and_then(|parent| self.arena[usize::from(parent)].parent);
+        self.parent = self
+            .parent
+            .and_then(|parent| self.arena[usize::from(parent)].parent);
     }
     pub fn checkpoint(&self) -> Checkpoint {
         Checkpoint {
-            cursor: self.cursor
+            cursor: self.cursor,
         }
     }
     pub fn start_internal_at(&mut self, checkpoint: Checkpoint, token: Token, range: Range) {
@@ -104,7 +107,7 @@ impl TreeBuilder {
                 range,
                 parent: self.parent,
                 next_sibling: None,
-                child: self.arena[usize::from(from)].next_sibling
+                child: self.arena[usize::from(from)].next_sibling,
             });
             self.arena[usize::from(from)].next_sibling = Some(id);
             id
@@ -115,7 +118,7 @@ impl TreeBuilder {
                 range,
                 parent: self.parent,
                 next_sibling: None,
-                child: self.arena[usize::from(parent)].child
+                child: self.arena[usize::from(parent)].child,
             });
             self.arena[usize::from(parent)].child = Some(id);
             id
@@ -126,7 +129,7 @@ impl TreeBuilder {
                 range,
                 parent: None,
                 next_sibling: None,
-                child: self.cursor
+                child: self.cursor,
             });
             id
         };
@@ -146,7 +149,7 @@ impl TreeBuilder {
 
         Tree {
             arena: self.arena.into_boxed_slice(),
-            root: cursor
+            root: cursor,
         }
     }
 }
@@ -154,7 +157,7 @@ impl TreeBuilder {
 #[derive(Clone)]
 pub struct Tree {
     pub arena: Box<[Node]>,
-    pub root: NodeId
+    pub root: NodeId,
 }
 impl Index<NodeId> for Tree {
     type Output = Node;
@@ -182,7 +185,7 @@ impl fmt::Debug for Tree {
                 while me.map(|me| me.next_sibling.is_none()).unwrap_or(false) {
                     match nested.checked_sub(1) {
                         Some(new) => nested = new,
-                        None => break 'outer
+                        None => break 'outer,
                     }
                     me = me.unwrap().parent.map(|id| &self[id]);
                 }
@@ -209,7 +212,10 @@ mod tests {
                 parent = Some(id);
             } else {
                 let mut node = Some(id);
-                while node.map(|node| tree[node].next_sibling.is_none()).unwrap_or(false) {
+                while node
+                    .map(|node| tree[node].next_sibling.is_none())
+                    .unwrap_or(false)
+                {
                     node = tree[node.unwrap()].parent;
                 }
                 next = node.and_then(|node| tree[node].next_sibling);
@@ -222,17 +228,17 @@ mod tests {
     fn simple_builder() {
         let mut builder = TreeBuilder::default();
         builder.start_internal(Token::Root, Range(1, Some(1)));
-            builder.start_internal(Token::Alternative, Range(1, Some(1)));
-                builder.leaf(Token::Start, Range(1, Some(1)));
-                builder.start_internal(Token::Group(1), Range(1, Some(1)));
-                    builder.start_internal(Token::Alternative, Range(1, Some(1)));
-                        builder.leaf(Token::Any, Range(1, Some(1)));
-                    builder.finish_internal();
-                builder.finish_internal();
-            builder.finish_internal();
-            builder.start_internal(Token::Alternative, Range(1, Some(1)));
-                builder.leaf(Token::End, Range(1, Some(1)));
-            builder.finish_internal();
+        builder.start_internal(Token::Alternative, Range(1, Some(1)));
+        builder.leaf(Token::Start, Range(1, Some(1)));
+        builder.start_internal(Token::Group(1), Range(1, Some(1)));
+        builder.start_internal(Token::Alternative, Range(1, Some(1)));
+        builder.leaf(Token::Any, Range(1, Some(1)));
+        builder.finish_internal();
+        builder.finish_internal();
+        builder.finish_internal();
+        builder.start_internal(Token::Alternative, Range(1, Some(1)));
+        builder.leaf(Token::End, Range(1, Some(1)));
+        builder.finish_internal();
         builder.finish_internal();
 
         let tree = builder.finish();
@@ -256,20 +262,20 @@ Root 1..1
     fn builder_checkpoint() {
         let mut builder = TreeBuilder::default();
         builder.start_internal(Token::Root, Range(1, Some(1)));
-            let mut alternation = builder.checkpoint();
-                builder.leaf(Token::Start, Range(1, Some(1)));
-                let group = builder.checkpoint();
-                    builder.start_internal(Token::Alternative, Range(1, Some(1)));
-                        builder.leaf(Token::Any, Range(1, Some(1)));
-                    builder.finish_internal();
-                builder.start_internal_at(group, Token::Group(1), Range(1, Some(1)));
-                builder.finish_internal();
-            builder.start_internal_at(alternation, Token::Alternative, Range(1, Some(1)));
-            builder.finish_internal();
-            alternation = builder.checkpoint();
-                builder.leaf(Token::End, Range(1, Some(1)));
-            builder.start_internal_at(alternation, Token::Alternative, Range(1, Some(1)));
-            builder.finish_internal();
+        let mut alternation = builder.checkpoint();
+        builder.leaf(Token::Start, Range(1, Some(1)));
+        let group = builder.checkpoint();
+        builder.start_internal(Token::Alternative, Range(1, Some(1)));
+        builder.leaf(Token::Any, Range(1, Some(1)));
+        builder.finish_internal();
+        builder.start_internal_at(group, Token::Group(1), Range(1, Some(1)));
+        builder.finish_internal();
+        builder.start_internal_at(alternation, Token::Alternative, Range(1, Some(1)));
+        builder.finish_internal();
+        alternation = builder.checkpoint();
+        builder.leaf(Token::End, Range(1, Some(1)));
+        builder.start_internal_at(alternation, Token::Alternative, Range(1, Some(1)));
+        builder.finish_internal();
         builder.finish_internal();
 
         let tree = builder.finish();