6 年之前 · 1acc2a1a32
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -148,6 +148,11 @@ name = "num-traits"
 
															 version = "0.2.6"
														
 
															 source = "registry+https://github.com/rust-lang/crates.io-index"
														
 
															+[[package]]
														
 
															+name = "posix-regex"
														
 
															+version = "0.1.0"
														
 
															+source = "registry+https://github.com/rust-lang/crates.io-index"
														
 
															+
														
 
															 [[package]]
														
 
															 name = "proc-macro2"
														
 
															 version = "0.2.3"
														
@@ -227,6 +232,7 @@ dependencies = [
 
															  "compiler_builtins 0.1.0 (git+https://github.com/rust-lang-nursery/compiler-builtins.git)",
														
 
															  "core_io 0.1.20180619",
														
 
															  "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
														
 
															+ "posix-regex 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
														
 
															  "ralloc 1.0.0",
														
 
															  "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
														
 
															  "redox_syscall 0.1.40 (git+https://gitlab.redox-os.org/redox-os/syscall.git?branch=relibc)",
														
@@ -464,6 +470,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 
															 "checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
														
 
															 "checksum log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d4fcce5fa49cc693c312001daf1d13411c4a5283796bac1084299ea3e567113f"
														
 
															 "checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
														
 
															+"checksum posix-regex 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "58b31ca4f5022c6c0a22206d63c177be2f418355db5a713db22bd901c6ac0db3"
														
 
															 "checksum proc-macro2 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "cd07deb3c6d1d9ff827999c7f9b04cdfd66b1b17ae508e14fe47b620f2282ae0"
														
 
															 "checksum quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a"
														
 
															 "checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd"
														
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,7 @@ cc = "1.0.17"
 
															 cbitset = "0.1.0"
														
 
															 core_io = { path = "core_io", features = ["collections"] }
														
 
															 lazy_static = { version = "*", features = ["nightly", "spin_no_std"] }
														
 
															+posix-regex = { version = "0.1", features = ["no_std"] }
														
 
															 rand = { version = "0.5.2", default-features = false }
														
 
															 va_list = { path = "va_list", features = ["no_std"] }
														
--- a/Makefile
+++ b/Makefile
@@ -53,6 +53,7 @@ libc: $(BUILD)/release/libc.a $(BUILD)/release/crt0.o $(BUILD)/release/crti.o $(
 
															 libm: $(BUILD)/openlibm/libopenlibm.a
														
 
															 sysroot: all
														
 
															+	rm -rf $@
														
 
															 	rm -rf $@.partial
														
 
															 	mkdir -p $@.partial
														
 
															 	make install DESTDIR=$@.partial
														
--- a/include/sys/types.h
+++ b/include/sys/types.h
@@ -20,6 +20,7 @@ typedef long clock_t;
 
															 typedef int clockid_t;
														
 
															 typedef void* timer_t;
														
 
															 typedef unsigned long int blkcnt_t;
														
 
															+typedef size_t regoff_t;
														
 
															 typedef unsigned char u_char, uchar;
														
 
															 typedef unsigned short u_short, ushort;
														
--- a/src/header/mod.rs
+++ b/src/header/mod.rs
@@ -14,6 +14,7 @@ pub mod netdb;
 
															 pub mod netinet_in;
														
 
															 //pub mod pthread;
														
 
															 pub mod pwd;
														
 
															+pub mod regex;
														
 
															 pub mod semaphore;
														
 
															 pub mod setjmp;
														
 
															 pub mod sgtty;
														
--- a/src/header/regex/cbindgen.toml
+++ b/src/header/regex/cbindgen.toml
@@ -0,0 +1,7 @@
 
															+sys_includes = ["sys/types.h"]
														
 
															+include_guard = "_TEMPLATE_H"
														
 
															+language = "C"
														
 
															+style = "Type"
														
 
															+
														
 
															+[enum]
														
 
															+prefix_with_name = true
														
--- a/src/header/regex/mod.rs
+++ b/src/header/regex/mod.rs
@@ -0,0 +1,183 @@
 
															+//! regex.h implementation, following http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html
														
 
															+
														
 
															+use alloc::borrow::Cow;
														
 
															+use alloc::boxed::Box;
														
 
															+use alloc::vec::Vec;
														
 
															+use core::{mem, slice, ptr};
														
 
															+use header::string::strlen;
														
 
															+use platform::types::*;
														
 
															+use posix_regex::{PosixRegexBuilder, PosixRegex};
														
 
															+use posix_regex::compile::{Error as CompileError, Token, Range};
														
 
															+
														
 
															+#[repr(C)]
														
 
															+pub struct regex_t {
														
 
															+    // Can't be a normal Vec<T> because then the struct size won't be known
														
 
															+    // from C.
														
 
															+    ptr: *mut c_void,
														
 
															+    length: size_t,
														
 
															+    capacity: size_t,
														
 
															+
														
 
															+    cflags: c_int,
														
 
															+    re_nsub: size_t
														
 
															+}
														
 
															+#[repr(C)]
														
 
															+pub struct regmatch_t {
														
 
															+    rm_so: regoff_t,
														
 
															+    rm_eo: regoff_t
														
 
															+}
														
 
															+
														
 
															+pub const REG_EXTENDED: c_int = 1;
														
 
															+pub const REG_ICASE:    c_int = 2;
														
 
															+pub const REG_NOSUB:    c_int = 4;
														
 
															+pub const REG_NEWLINE:  c_int = 8;
														
 
															+pub const REG_NOTBOL:   c_int = 16;
														
 
															+pub const REG_NOTEOL:   c_int = 32;
														
 
															+
														
 
															+pub const REG_NOMATCH:  c_int = 1;
														
 
															+pub const REG_BADPAT:   c_int = 2;
														
 
															+pub const REG_ECOLLATE: c_int = 3;
														
 
															+pub const REG_ECTYPE:   c_int = 4;
														
 
															+pub const REG_EESCAPE:  c_int = 5;
														
 
															+pub const REG_ESUBREG:  c_int = 6;
														
 
															+pub const REG_EBRACK:   c_int = 7;
														
 
															+pub const REG_ENOSYS:   c_int = 8;
														
 
															+pub const REG_EPAREN:   c_int = 9;
														
 
															+pub const REG_EBRACE:   c_int = 10;
														
 
															+pub const REG_BADBR:    c_int = 11;
														
 
															+pub const REG_ERANGE:   c_int = 12;
														
 
															+pub const REG_ESPACE:   c_int = 13;
														
 
															+pub const REG_BADRPT:   c_int = 14;
														
 
															+
														
 
															+fn count_groups(branches: &[Vec<(Token, Range)>]) -> usize {
														
 
															+    let mut count = 0;
														
 
															+    for branch in branches {
														
 
															+        for (token, _) in branch {
														
 
															+            if let Token::Group(ref inner) = token {
														
 
															+                count += 1 + count_groups(inner);
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+    count
														
 
															+}
														
 
															+
														
 
															+#[no_mangle]
														
 
															+pub extern "C" fn regcomp(out: *mut regex_t, pat: *const c_char, cflags: c_int) -> c_int {
														
 
															+    if cflags & REG_EXTENDED == REG_EXTENDED {
														
 
															+        return REG_ENOSYS;
														
 
															+    }
														
 
															+
														
 
															+    let pat = unsafe { slice::from_raw_parts(pat as *const u8, strlen(pat)) };
														
 
															+    let res = PosixRegexBuilder::new(pat)
														
 
															+        .with_default_classes()
														
 
															+        .compile_tokens();
														
 
															+
														
 
															+    match res {
														
 
															+        Ok(mut branches) => unsafe {
														
 
															+            let re_nsub = count_groups(&branches);
														
 
															+            *out = regex_t {
														
 
															+                ptr: branches.as_mut_ptr() as *mut c_void,
														
 
															+                length: branches.len(),
														
 
															+                capacity: branches.capacity(),
														
 
															+
														
 
															+                cflags,
														
 
															+                re_nsub,
														
 
															+            };
														
 
															+            mem::forget(branches);
														
 
															+            0
														
 
															+        },
														
 
															+        Err(CompileError::EmptyRepetition)
														
 
															+            | Err(CompileError::IntegerOverflow)
														
 
															+            | Err(CompileError::IllegalRange) => REG_BADBR,
														
 
															+        Err(CompileError::UnclosedRepetition) => REG_EBRACE,
														
 
															+        Err(CompileError::LeadingRepetition) => REG_BADRPT,
														
 
															+        Err(CompileError::UnknownCollation) => REG_ECOLLATE,
														
 
															+        Err(CompileError::UnknownClass(_)) => REG_ECTYPE,
														
 
															+        Err(_) => REG_BADPAT
														
 
															+    }
														
 
															+}
														
 
															+#[no_mangle]
														
 
															+pub unsafe extern "C" fn regfree(regex: *mut regex_t) {
														
 
															+    Vec::from_raw_parts(
														
 
															+        (*regex).ptr as *mut Vec<(Token, Range)>,
														
 
															+        (*regex).length,
														
 
															+        (*regex).capacity
														
 
															+    );
														
 
															+}
														
 
															+#[no_mangle]
														
 
															+pub extern "C" fn regexec(regex: *const regex_t, input: *const c_char,
														
 
															+                          nmatch: size_t, pmatch: *mut regmatch_t, eflags: c_int) -> c_int {
														
 
															+    if eflags & REG_EXTENDED == REG_EXTENDED {
														
 
															+        return REG_ENOSYS;
														
 
															+    }
														
 
															+
														
 
															+    let regex = unsafe { &(*regex) };
														
 
															+
														
 
															+    // Allow specifying a compiler argument to the executor and vise versa
														
 
															+    // because why not?
														
 
															+    let mut flags = regex.cflags | eflags;
														
 
															+
														
 
															+    let input = unsafe { slice::from_raw_parts(input as *const u8, strlen(input)) };
														
 
															+
														
 
															+    let branches = unsafe { slice::from_raw_parts(regex.ptr as *const Vec<(Token, Range)>, regex.length) };
														
 
															+
														
 
															+    let matches = PosixRegex::new(Cow::Borrowed(&branches))
														
 
															+        .case_insensitive(flags & REG_ICASE == REG_ICASE)
														
 
															+        .newline(flags & REG_NEWLINE == REG_NEWLINE)
														
 
															+        .no_start(flags & REG_NOTBOL == REG_NOTBOL)
														
 
															+        .no_end(flags & REG_NOTEOL == REG_NOTEOL)
														
 
															+        .matches(input, Some(1));
														
 
															+
														
 
															+    if !matches.is_empty()
														
 
															+            && eflags & REG_NOSUB != REG_NOSUB
														
 
															+            && !pmatch.is_null()
														
 
															+            && nmatch > 0 {
														
 
															+        let first = &matches[0];
														
 
															+
														
 
															+        let len = first.len().min(nmatch as usize);
														
 
															+        for i in 0..len {
														
 
															+            let (start, end) = first[i];
														
 
															+            unsafe {
														
 
															+                *pmatch.offset(i as isize) = regmatch_t {
														
 
															+                    rm_so: start,
														
 
															+                    rm_eo: end
														
 
															+                };
														
 
															+            }
														
 
															+        }
														
 
															+        for i in len as isize..nmatch as isize {
														
 
															+            unsafe {
														
 
															+                *pmatch.offset(i) = regmatch_t {
														
 
															+                    rm_so: !0,
														
 
															+                    rm_eo: !0
														
 
															+                };
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    if matches.is_empty() { REG_NOMATCH } else { 0 }
														
 
															+}
														
 
															+
														
 
															+#[no_mangle]
														
 
															+pub extern "C" fn regerror(code: c_int, _regex: *const regex_t, out: *mut c_char, max: c_int) {
														
 
															+    let string = match code {
														
 
															+        0            => "No error\0",
														
 
															+        REG_NOMATCH  => "No match\0",
														
 
															+        REG_BADPAT   => "Invalid regexp\0",
														
 
															+        REG_ECOLLATE => "Unknown collating element\0",
														
 
															+        REG_ECTYPE   => "Unknown character class name\0",
														
 
															+        REG_EESCAPE  => "Trailing backslash\0",
														
 
															+        REG_ESUBREG  => "Invalid back reference\0",
														
 
															+        REG_EBRACK   => "Missing ']'\0",
														
 
															+        REG_ENOSYS   => "Unsupported operation\0",
														
 
															+        REG_EPAREN   => "Missing ')'\0",
														
 
															+        REG_EBRACE   => "Missing '}'\0",
														
 
															+        REG_BADBR    => "Invalid contents of {}\0",
														
 
															+        REG_ERANGE   => "Invalid character range\0",
														
 
															+        REG_ESPACE   => "Out of memory\0",
														
 
															+        REG_BADRPT   => "Repetition not preceded by valid expression\0",
														
 
															+        _ => "Unknown error\0"
														
 
															+    };
														
 
															+
														
 
															+    unsafe {
														
 
															+        ptr::copy_nonoverlapping(string.as_ptr(), out as *mut u8, string.len().min(max as usize))
														
 
															+    }
														
 
															+}
														
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -21,6 +21,7 @@ extern crate cbitset;
 
															 extern crate core_io;
														
 
															 #[macro_use]
														
 
															 extern crate lazy_static;
														
 
															+extern crate posix_regex;
														
 
															 extern crate rand;
														
 
															 extern crate va_list;
														
--- a/src/platform/types.rs
+++ b/src/platform/types.rs
@@ -46,6 +46,7 @@ pub type wchar_t = i32;
 
															 pub type wint_t = u32;
														
 
															 pub type wctype_t = i64;
														
 
															+pub type regoff_t = size_t;
														
 
															 pub type off_t = c_long;
														
 
															 pub type mode_t = c_int;
														
 
															 pub type time_t = c_long;
														
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -11,6 +11,7 @@ EXPECT_BINS=\
 
															 	locale \
														
 
															 	math \
														
 
															 	netdb \
														
 
															+	regex \
														
 
															 	select \
														
 
															 	setjmp \
														
 
															 	signal \
														
--- a/tests/expected/regex.stderr
+++ b/tests/expected/regex.stderr
--- a/tests/expected/regex.stdout
+++ b/tests/expected/regex.stdout
@@ -0,0 +1,3 @@
 
															+Matching group: 25 - 36
														
 
															+Matching group: 31 - 36
														
 
															+Matching group: -1 - -1
														
--- a/tests/regex.c
+++ b/tests/regex.c
@@ -0,0 +1,31 @@
 
															+#include <regex.h>
														
 
															+#include <stdio.h>
														
 
															+
														
 
															+int main() {
														
 
															+    regex_t regex;
														
 
															+    char error_buf[256];
														
 
															+
														
 
															+    int error = regcomp(&regex, "h.llo \\(w.rld\\)", REG_ICASE);
														
 
															+    if (error) {
														
 
															+        regerror(error, &regex, error_buf, 255);
														
 
															+        error_buf[255] = 0;
														
 
															+        printf("regcomp error: %d = %s\n", error, error_buf);
														
 
															+        return -1;
														
 
															+    }
														
 
															+
														
 
															+    regmatch_t matches[3] = { 0 };
														
 
															+
														
 
															+    error = regexec(&regex, "Hey, how are you? Hello? Hallo Wurld??", 3, matches, 0);
														
 
															+
														
 
															+    regfree(&regex);
														
 
															+
														
 
															+    if (error) {
														
 
															+        regerror(error, &regex, error_buf, 255);
														
 
															+        printf("regexec error: %d = %s\n", error, error_buf);
														
 
															+        return -1;
														
 
															+    }
														
 
															+
														
 
															+    for (int group = 0; group < 3; group += 1) {
														
 
															+        printf("Matching group: %d - %d\n", matches[group].rm_so, matches[group].rm_eo);
														
 
															+    }
														
 
															+}