|
@@ -0,0 +1,183 @@
|
|
|
|
+//! regex.h implementation, following http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html
|
|
|
|
+
|
|
|
|
+use alloc::borrow::Cow;
|
|
|
|
+use alloc::boxed::Box;
|
|
|
|
+use alloc::vec::Vec;
|
|
|
|
+use core::{mem, slice, ptr};
|
|
|
|
+use header::string::strlen;
|
|
|
|
+use platform::types::*;
|
|
|
|
+use posix_regex::{PosixRegexBuilder, PosixRegex};
|
|
|
|
+use posix_regex::compile::{Error as CompileError, Token, Range};
|
|
|
|
+
|
|
|
|
+#[repr(C)]
|
|
|
|
+pub struct regex_t {
|
|
|
|
+ // Can't be a normal Vec<T> because then the struct size won't be known
|
|
|
|
+ // from C.
|
|
|
|
+ ptr: *mut c_void,
|
|
|
|
+ length: size_t,
|
|
|
|
+ capacity: size_t,
|
|
|
|
+
|
|
|
|
+ cflags: c_int,
|
|
|
|
+ re_nsub: size_t
|
|
|
|
+}
|
|
|
|
+#[repr(C)]
|
|
|
|
+pub struct regmatch_t {
|
|
|
|
+ rm_so: regoff_t,
|
|
|
|
+ rm_eo: regoff_t
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+pub const REG_EXTENDED: c_int = 1;
|
|
|
|
+pub const REG_ICASE: c_int = 2;
|
|
|
|
+pub const REG_NOSUB: c_int = 4;
|
|
|
|
+pub const REG_NEWLINE: c_int = 8;
|
|
|
|
+pub const REG_NOTBOL: c_int = 16;
|
|
|
|
+pub const REG_NOTEOL: c_int = 32;
|
|
|
|
+
|
|
|
|
+pub const REG_NOMATCH: c_int = 1;
|
|
|
|
+pub const REG_BADPAT: c_int = 2;
|
|
|
|
+pub const REG_ECOLLATE: c_int = 3;
|
|
|
|
+pub const REG_ECTYPE: c_int = 4;
|
|
|
|
+pub const REG_EESCAPE: c_int = 5;
|
|
|
|
+pub const REG_ESUBREG: c_int = 6;
|
|
|
|
+pub const REG_EBRACK: c_int = 7;
|
|
|
|
+pub const REG_ENOSYS: c_int = 8;
|
|
|
|
+pub const REG_EPAREN: c_int = 9;
|
|
|
|
+pub const REG_EBRACE: c_int = 10;
|
|
|
|
+pub const REG_BADBR: c_int = 11;
|
|
|
|
+pub const REG_ERANGE: c_int = 12;
|
|
|
|
+pub const REG_ESPACE: c_int = 13;
|
|
|
|
+pub const REG_BADRPT: c_int = 14;
|
|
|
|
+
|
|
|
|
+fn count_groups(branches: &[Vec<(Token, Range)>]) -> usize {
|
|
|
|
+ let mut count = 0;
|
|
|
|
+ for branch in branches {
|
|
|
|
+ for (token, _) in branch {
|
|
|
|
+ if let Token::Group(ref inner) = token {
|
|
|
|
+ count += 1 + count_groups(inner);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ count
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#[no_mangle]
|
|
|
|
+pub extern "C" fn regcomp(out: *mut regex_t, pat: *const c_char, cflags: c_int) -> c_int {
|
|
|
|
+ if cflags & REG_EXTENDED == REG_EXTENDED {
|
|
|
|
+ return REG_ENOSYS;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ let pat = unsafe { slice::from_raw_parts(pat as *const u8, strlen(pat)) };
|
|
|
|
+ let res = PosixRegexBuilder::new(pat)
|
|
|
|
+ .with_default_classes()
|
|
|
|
+ .compile_tokens();
|
|
|
|
+
|
|
|
|
+ match res {
|
|
|
|
+ Ok(mut branches) => unsafe {
|
|
|
|
+ let re_nsub = count_groups(&branches);
|
|
|
|
+ *out = regex_t {
|
|
|
|
+ ptr: branches.as_mut_ptr() as *mut c_void,
|
|
|
|
+ length: branches.len(),
|
|
|
|
+ capacity: branches.capacity(),
|
|
|
|
+
|
|
|
|
+ cflags,
|
|
|
|
+ re_nsub,
|
|
|
|
+ };
|
|
|
|
+ mem::forget(branches);
|
|
|
|
+ 0
|
|
|
|
+ },
|
|
|
|
+ Err(CompileError::EmptyRepetition)
|
|
|
|
+ | Err(CompileError::IntegerOverflow)
|
|
|
|
+ | Err(CompileError::IllegalRange) => REG_BADBR,
|
|
|
|
+ Err(CompileError::UnclosedRepetition) => REG_EBRACE,
|
|
|
|
+ Err(CompileError::LeadingRepetition) => REG_BADRPT,
|
|
|
|
+ Err(CompileError::UnknownCollation) => REG_ECOLLATE,
|
|
|
|
+ Err(CompileError::UnknownClass(_)) => REG_ECTYPE,
|
|
|
|
+ Err(_) => REG_BADPAT
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+#[no_mangle]
|
|
|
|
+pub unsafe extern "C" fn regfree(regex: *mut regex_t) {
|
|
|
|
+ Vec::from_raw_parts(
|
|
|
|
+ (*regex).ptr as *mut Vec<(Token, Range)>,
|
|
|
|
+ (*regex).length,
|
|
|
|
+ (*regex).capacity
|
|
|
|
+ );
|
|
|
|
+}
|
|
|
|
+#[no_mangle]
|
|
|
|
+pub extern "C" fn regexec(regex: *const regex_t, input: *const c_char,
|
|
|
|
+ nmatch: size_t, pmatch: *mut regmatch_t, eflags: c_int) -> c_int {
|
|
|
|
+ if eflags & REG_EXTENDED == REG_EXTENDED {
|
|
|
|
+ return REG_ENOSYS;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ let regex = unsafe { &(*regex) };
|
|
|
|
+
|
|
|
|
+ // Allow specifying a compiler argument to the executor and vise versa
|
|
|
|
+ // because why not?
|
|
|
|
+ let mut flags = regex.cflags | eflags;
|
|
|
|
+
|
|
|
|
+ let input = unsafe { slice::from_raw_parts(input as *const u8, strlen(input)) };
|
|
|
|
+
|
|
|
|
+ let branches = unsafe { slice::from_raw_parts(regex.ptr as *const Vec<(Token, Range)>, regex.length) };
|
|
|
|
+
|
|
|
|
+ let matches = PosixRegex::new(Cow::Borrowed(&branches))
|
|
|
|
+ .case_insensitive(flags & REG_ICASE == REG_ICASE)
|
|
|
|
+ .newline(flags & REG_NEWLINE == REG_NEWLINE)
|
|
|
|
+ .no_start(flags & REG_NOTBOL == REG_NOTBOL)
|
|
|
|
+ .no_end(flags & REG_NOTEOL == REG_NOTEOL)
|
|
|
|
+ .matches(input, Some(1));
|
|
|
|
+
|
|
|
|
+ if !matches.is_empty()
|
|
|
|
+ && eflags & REG_NOSUB != REG_NOSUB
|
|
|
|
+ && !pmatch.is_null()
|
|
|
|
+ && nmatch > 0 {
|
|
|
|
+ let first = &matches[0];
|
|
|
|
+
|
|
|
|
+ let len = first.len().min(nmatch as usize);
|
|
|
|
+ for i in 0..len {
|
|
|
|
+ let (start, end) = first[i];
|
|
|
|
+ unsafe {
|
|
|
|
+ *pmatch.offset(i as isize) = regmatch_t {
|
|
|
|
+ rm_so: start,
|
|
|
|
+ rm_eo: end
|
|
|
|
+ };
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ for i in len as isize..nmatch as isize {
|
|
|
|
+ unsafe {
|
|
|
|
+ *pmatch.offset(i) = regmatch_t {
|
|
|
|
+ rm_so: !0,
|
|
|
|
+ rm_eo: !0
|
|
|
|
+ };
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if matches.is_empty() { REG_NOMATCH } else { 0 }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#[no_mangle]
|
|
|
|
+pub extern "C" fn regerror(code: c_int, _regex: *const regex_t, out: *mut c_char, max: c_int) {
|
|
|
|
+ let string = match code {
|
|
|
|
+ 0 => "No error\0",
|
|
|
|
+ REG_NOMATCH => "No match\0",
|
|
|
|
+ REG_BADPAT => "Invalid regexp\0",
|
|
|
|
+ REG_ECOLLATE => "Unknown collating element\0",
|
|
|
|
+ REG_ECTYPE => "Unknown character class name\0",
|
|
|
|
+ REG_EESCAPE => "Trailing backslash\0",
|
|
|
|
+ REG_ESUBREG => "Invalid back reference\0",
|
|
|
|
+ REG_EBRACK => "Missing ']'\0",
|
|
|
|
+ REG_ENOSYS => "Unsupported operation\0",
|
|
|
|
+ REG_EPAREN => "Missing ')'\0",
|
|
|
|
+ REG_EBRACE => "Missing '}'\0",
|
|
|
|
+ REG_BADBR => "Invalid contents of {}\0",
|
|
|
|
+ REG_ERANGE => "Invalid character range\0",
|
|
|
|
+ REG_ESPACE => "Out of memory\0",
|
|
|
|
+ REG_BADRPT => "Repetition not preceded by valid expression\0",
|
|
|
|
+ _ => "Unknown error\0"
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ unsafe {
|
|
|
|
+ ptr::copy_nonoverlapping(string.as_ptr(), out as *mut u8, string.len().min(max as usize))
|
|
|
|
+ }
|
|
|
|
+}
|