Browse Source

Re-use posix-regex matcher in fnmatch

jD91mZM2 6 years ago
parent
commit
d252838496
2 changed files with 108 additions and 134 deletions
  1. 1 1
      posix-regex
  2. 107 133
      src/header/fnmatch/mod.rs

+ 1 - 1
posix-regex

@@ -1 +1 @@
-Subproject commit eccf84478c7a60a2869bbe438cb1d2507c9f45b2
+Subproject commit bf7d93bb7aba406dc135a7a9eb031371e7739fe6

+ 107 - 133
src/header/fnmatch/mod.rs

@@ -1,8 +1,14 @@
 //! fnmatch implementation
 
+use alloc::borrow::Cow;
 use alloc::vec::Vec;
+use core::slice;
 
 use platform::types::*;
+use posix_regex::PosixRegex;
+use posix_regex::compile::{Collation, Token, Range};
+
+const ONCE: Range = Range(1, Some(1));
 
 pub const FNM_NOMATCH: c_int = 1;
 
@@ -10,82 +16,105 @@ pub const FNM_NOESCAPE: c_int = 1;
 pub const FNM_PATHNAME: c_int = 2;
 pub const FNM_PERIOD: c_int = 4;
 pub const FNM_CASEFOLD: c_int = 8;
+// TODO: FNM_EXTMATCH
 
-#[derive(Debug)]
-enum Token {
-    Any,
-    Char(u8),
-    Match(bool, Vec<u8>),
-    Wildcard,
-    // TODO: FNM_EXTMATCH, which is basically a whole another custom regex
-    // format that's ambigious and ugh. The C standard library is really bloaty
-    // and I sure hope we can get away with delaying this as long as possible.
-    // If you need to implement this, you can contact jD91mZM2 for assistance
-    // in reading this code in case it's ugly.
-}
-
-unsafe fn next_token(pattern: &mut *const c_char, flags: c_int) -> Option<Token> {
-    let c = **pattern as u8;
-    if c == 0 {
-        return None;
-    }
-    *pattern = pattern.offset(1);
-    Some(match c {
-        b'\\' if flags & FNM_NOESCAPE == FNM_NOESCAPE => {
-            let c = **pattern as u8;
-            if c == 0 {
-                // Trailing backslash. Maybe error here?
-                return None;
-            }
-            *pattern = pattern.offset(1);
-            Token::Char(c)
+unsafe fn tokenize(mut pattern: *const u8, flags: c_int) -> Vec<(Token, Range)> {
+    fn any(leading: bool, flags: c_int) -> Token {
+        let mut list = Vec::new();
+        if flags & FNM_PATHNAME == FNM_PATHNAME {
+            list.push(Collation::Char(b'/'))
+        }
+        if leading && flags & FNM_PERIOD == FNM_PERIOD {
+            list.push(Collation::Char(b'.'))
         }
-        b'?' => Token::Any,
-        b'*' => Token::Wildcard,
-        b'[' => {
-            let mut matches = Vec::new();
-            let invert = if **pattern as u8 == b'!' {
-                *pattern = pattern.offset(1);
-                true
-            } else {
-                false
-            };
+        Token::OneOf { invert: true, list }
+    }
+    fn can_push(leading: bool, flags: c_int, c: u8) -> bool {
+        (c != b'/' || flags & FNM_PATHNAME != FNM_PATHNAME)
+            && (c != b'.' || !leading || flags & FNM_PERIOD != FNM_PERIOD)
+    }
+    fn is_leading(flags: c_int, c: u8) -> bool {
+        c == b'/' && flags & FNM_PATHNAME == FNM_PATHNAME
+    }
+
+    let mut tokens = Vec::new();
+    let mut leading = true;
+
+    while *pattern != 0 {
+        let was_leading = leading;
+        leading = false;
 
-            loop {
-                let mut c = **pattern as u8;
+        let c = *pattern;
+        pattern = pattern.offset(1);
+
+        tokens.push(match c {
+            b'\\' if flags & FNM_NOESCAPE == FNM_NOESCAPE => {
+                let c = *pattern;
                 if c == 0 {
+                    // Trailing backslash. Maybe error here?
                     break;
                 }
-                *pattern = pattern.offset(1);
-                match c {
-                    b']' => break,
-                    b'\\' => {
-                        c = **pattern as u8;
-                        *pattern = pattern.offset(1);
-                        if c == 0 {
-                            // Trailing backslash. Maybe error?
-                            break;
+                pattern = pattern.offset(1);
+                leading = is_leading(flags, c);
+                (Token::Char(c), ONCE)
+            },
+            b'?' => (any(was_leading, flags), ONCE),
+            b'*' => (any(was_leading, flags), Range(0, None)),
+            b'[' => {
+                let mut list: Vec<Collation> = Vec::new();
+                let invert = if *pattern == b'!' {
+                    pattern = pattern.offset(1);
+                    true
+                } else {
+                    false
+                };
+
+                loop {
+                    let mut c = *pattern;
+                    if c == 0 {
+                        break;
+                    }
+                    pattern = pattern.offset(1);
+                    match c {
+                        b']' => break,
+                        b'\\' => {
+                            c = *pattern;
+                            pattern = pattern.offset(1);
+                            if c == 0 {
+                                // Trailing backslash. Maybe error?
+                                break;
+                            }
                         }
+                        _ => (),
                     }
-                    _ => (),
-                }
-                if matches.len() >= 2 && matches[matches.len() - 1] == b'-' {
-                    let len = matches.len();
-                    let start = matches[len - 2];
-                    matches.drain(len - 2..);
-                    // Exclusive range because we'll push C later
-                    for c in start..c {
-                        matches.push(c);
+                    if *pattern == b'-' && *pattern.offset(1) != 0 {
+                        let end = *pattern.offset(1);
+                        pattern = pattern.offset(2);
+                        for c in c..=end {
+                            if can_push(was_leading, flags, c) {
+                                list.push(Collation::Char(c));
+                            }
+                        }
+                    } else {
+                        if can_push(was_leading, flags, c) {
+                            list.push(Collation::Char(c));
+                        }
                     }
                 }
-                matches.push(c);
-            }
-            // Otherwise, there was no closing ]. Maybe error?
+                // Otherwise, there was no closing ]. Maybe error?
 
-            Token::Match(invert, matches)
-        }
-        c => Token::Char(c),
-    })
+                (Token::OneOf {
+                    invert,
+                    list
+                }, ONCE)
+            }
+            c => {
+                leading = is_leading(flags, c);
+                (Token::Char(c), ONCE)
+            }
+        })
+    }
+    tokens
 }
 
 #[no_mangle]
@@ -94,75 +123,20 @@ pub unsafe extern "C" fn fnmatch(
     mut input: *const c_char,
     flags: c_int,
 ) -> c_int {
-    let pathname = flags & FNM_PATHNAME == FNM_PATHNAME;
-    let casefold = flags & FNM_CASEFOLD == FNM_CASEFOLD;
-
-    let mut leading = true;
-
-    loop {
-        if *input == 0 {
-            return if *pattern == 0 { 0 } else { FNM_NOMATCH };
-        }
-        if leading && flags & FNM_PERIOD == FNM_PERIOD {
-            if *input as u8 == b'.' && *pattern as u8 != b'.' {
-                return FNM_NOMATCH;
-            }
-        }
-        leading = false;
-        match next_token(&mut pattern, flags) {
-            Some(Token::Any) => {
-                if pathname && *input as u8 == b'/' {
-                    return FNM_NOMATCH;
-                }
-                input = input.offset(1);
-            }
-            Some(Token::Char(c)) => {
-                let mut a = *input as u8;
-                if casefold && a >= b'a' && a <= b'z' {
-                    a -= b'a' - b'A';
-                }
-                let mut b = c;
-                if casefold && b >= b'a' && b <= b'z' {
-                    b -= b'a' - b'A';
-                }
-                if a != b {
-                    return FNM_NOMATCH;
-                }
-                if pathname && a == b'/' {
-                    leading = true;
-                }
-                input = input.offset(1);
-            }
-            Some(Token::Match(invert, matches)) => {
-                if (pathname && *input as u8 == b'/') || matches.contains(&(*input as u8)) == invert
-                {
-                    // Found it, but it's inverted! Or vise versa.
-                    return FNM_NOMATCH;
-                }
-                input = input.offset(1);
-            }
-            Some(Token::Wildcard) => {
-                loop {
-                    let c = *input as u8;
-                    if c == 0 {
-                        return if *pattern == 0 { 0 } else { FNM_NOMATCH };
-                    }
+    let mut len = 0;
+    while *input.offset(len) != 0 {
+        len += 1;
+    }
+    let input = slice::from_raw_parts(input as *const u8, len as usize);
 
-                    let ret = fnmatch(pattern, input, flags);
-                    if ret == FNM_NOMATCH {
-                        input = input.offset(1);
-                    } else {
-                        // Either an error or a match. Forward the return.
-                        return ret;
-                    }
+    let mut tokens = tokenize(pattern as *const u8, flags);
+    tokens.push((Token::End, ONCE));
 
-                    if pathname && c == b'/' {
-                        // End of segment, no match yet
-                        return FNM_NOMATCH;
-                    }
-                }
-            }
-            None => return FNM_NOMATCH, // Pattern ended but there's still some input
-        }
+    if PosixRegex::new(Cow::Owned(vec![tokens]))
+        .case_insensitive(flags & FNM_CASEFOLD == FNM_CASEFOLD)
+            .matches_exact(input).is_some() {
+        0
+    } else {
+        FNM_NOMATCH
     }
 }