123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183 |
- //! regex.h implementation, following http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html
- use crate::{header::string::strlen, platform::types::*};
- use alloc::{borrow::Cow, vec::Vec};
- use core::{mem, ptr, slice};
- use posix_regex::{
- compile::{Error as CompileError, Range, Token},
- PosixRegex, PosixRegexBuilder,
- };
- pub type regoff_t = size_t;
- #[repr(C)]
- pub struct regex_t {
- // Can't be a normal Vec<T> because then the struct size won't be known
- // from C.
- ptr: *mut c_void,
- length: size_t,
- capacity: size_t,
- cflags: c_int,
- re_nsub: size_t,
- }
- #[repr(C)]
- pub struct regmatch_t {
- rm_so: regoff_t,
- rm_eo: regoff_t,
- }
- pub const REG_EXTENDED: c_int = 1;
- pub const REG_ICASE: c_int = 2;
- pub const REG_NOSUB: c_int = 4;
- pub const REG_NEWLINE: c_int = 8;
- pub const REG_NOTBOL: c_int = 16;
- pub const REG_NOTEOL: c_int = 32;
- pub const REG_NOMATCH: c_int = 1;
- pub const REG_BADPAT: c_int = 2;
- pub const REG_ECOLLATE: c_int = 3;
- pub const REG_ECTYPE: c_int = 4;
- pub const REG_EESCAPE: c_int = 5;
- pub const REG_ESUBREG: c_int = 6;
- pub const REG_EBRACK: c_int = 7;
- pub const REG_ENOSYS: c_int = 8;
- pub const REG_EPAREN: c_int = 9;
- pub const REG_EBRACE: c_int = 10;
- pub const REG_BADBR: c_int = 11;
- pub const REG_ERANGE: c_int = 12;
- pub const REG_ESPACE: c_int = 13;
- pub const REG_BADRPT: c_int = 14;
- #[no_mangle]
- #[linkage = "weak"] // redefined in GIT
- pub unsafe extern "C" fn regcomp(out: *mut regex_t, pat: *const c_char, cflags: c_int) -> c_int {
- if cflags & REG_EXTENDED == REG_EXTENDED {
- return REG_ENOSYS;
- }
- let pat = slice::from_raw_parts(pat as *const u8, strlen(pat));
- let res = PosixRegexBuilder::new(pat)
- .with_default_classes()
- .compile_tokens();
- match res {
- Ok(mut branches) => {
- let re_nsub = PosixRegex::new(Cow::Borrowed(&branches)).count_groups();
- *out = regex_t {
- ptr: branches.as_mut_ptr() as *mut c_void,
- length: branches.len(),
- capacity: branches.capacity(),
- cflags,
- re_nsub,
- };
- mem::forget(branches);
- 0
- }
- Err(CompileError::EmptyRepetition)
- | Err(CompileError::IntegerOverflow)
- | Err(CompileError::IllegalRange) => REG_BADBR,
- Err(CompileError::UnclosedRepetition) => REG_EBRACE,
- Err(CompileError::LeadingRepetition) => REG_BADRPT,
- Err(CompileError::UnknownCollation) => REG_ECOLLATE,
- Err(CompileError::UnknownClass(_)) => REG_ECTYPE,
- Err(_) => REG_BADPAT,
- }
- }
- #[no_mangle]
- #[linkage = "weak"] // redefined in GIT
- pub unsafe extern "C" fn regfree(regex: *mut regex_t) {
- Vec::from_raw_parts(
- (*regex).ptr as *mut Vec<(Token, Range)>,
- (*regex).length,
- (*regex).capacity,
- );
- }
- #[no_mangle]
- #[linkage = "weak"] // redefined in GIT
- pub unsafe extern "C" fn regexec(
- regex: *const regex_t,
- input: *const c_char,
- nmatch: size_t,
- pmatch: *mut regmatch_t,
- eflags: c_int,
- ) -> c_int {
- if eflags & REG_EXTENDED == REG_EXTENDED {
- return REG_ENOSYS;
- }
- let regex = &*regex;
- // Allow specifying a compiler argument to the executor and vise versa
- // because why not?
- let flags = regex.cflags | eflags;
- let input = slice::from_raw_parts(input as *const u8, strlen(input));
- let branches = slice::from_raw_parts(regex.ptr as *const Vec<(Token, Range)>, regex.length);
- let matches = PosixRegex::new(Cow::Borrowed(&branches))
- .case_insensitive(flags & REG_ICASE == REG_ICASE)
- .newline(flags & REG_NEWLINE == REG_NEWLINE)
- .no_start(flags & REG_NOTBOL == REG_NOTBOL)
- .no_end(flags & REG_NOTEOL == REG_NOTEOL)
- .matches(input, Some(1));
- if !matches.is_empty() && eflags & REG_NOSUB != REG_NOSUB && !pmatch.is_null() && nmatch > 0 {
- let first = &matches[0];
- for i in 0..nmatch {
- let (start, end) = first.get(i).and_then(|&range| range).unwrap_or((!0, !0));
- *pmatch.add(i) = regmatch_t {
- rm_so: start,
- rm_eo: end,
- };
- }
- }
- if matches.is_empty() {
- REG_NOMATCH
- } else {
- 0
- }
- }
- #[no_mangle]
- #[linkage = "weak"] // redefined in GIT
- pub extern "C" fn regerror(
- code: c_int,
- _regex: *const regex_t,
- out: *mut c_char,
- max: size_t,
- ) -> size_t {
- let string = match code {
- 0 => "No error\0",
- REG_NOMATCH => "No match\0",
- REG_BADPAT => "Invalid regexp\0",
- REG_ECOLLATE => "Unknown collating element\0",
- REG_ECTYPE => "Unknown character class name\0",
- REG_EESCAPE => "Trailing backslash\0",
- REG_ESUBREG => "Invalid back reference\0",
- REG_EBRACK => "Missing ']'\0",
- REG_ENOSYS => "Unsupported operation\0",
- REG_EPAREN => "Missing ')'\0",
- REG_EBRACE => "Missing '}'\0",
- REG_BADBR => "Invalid contents of {}\0",
- REG_ERANGE => "Invalid character range\0",
- REG_ESPACE => "Out of memory\0",
- REG_BADRPT => "Repetition not preceded by valid expression\0",
- _ => "Unknown error\0",
- };
- unsafe {
- ptr::copy_nonoverlapping(
- string.as_ptr(),
- out as *mut u8,
- string.len().min(max as usize),
- );
- }
- string.len()
- }
|