mod.rs 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. //! regex.h implementation, following http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html
  2. use crate::{header::string::strlen, platform::types::*};
  3. use alloc::{borrow::Cow, vec::Vec};
  4. use core::{mem, ptr, slice};
  5. use posix_regex::{
  6. compile::{Error as CompileError, Range, Token},
  7. PosixRegex, PosixRegexBuilder,
  8. };
  9. pub type regoff_t = size_t;
  10. #[repr(C)]
  11. pub struct regex_t {
  12. // Can't be a normal Vec<T> because then the struct size won't be known
  13. // from C.
  14. ptr: *mut c_void,
  15. length: size_t,
  16. capacity: size_t,
  17. cflags: c_int,
  18. re_nsub: size_t,
  19. }
  20. #[repr(C)]
  21. pub struct regmatch_t {
  22. rm_so: regoff_t,
  23. rm_eo: regoff_t,
  24. }
  25. pub const REG_EXTENDED: c_int = 1;
  26. pub const REG_ICASE: c_int = 2;
  27. pub const REG_NOSUB: c_int = 4;
  28. pub const REG_NEWLINE: c_int = 8;
  29. pub const REG_NOTBOL: c_int = 16;
  30. pub const REG_NOTEOL: c_int = 32;
  31. pub const REG_NOMATCH: c_int = 1;
  32. pub const REG_BADPAT: c_int = 2;
  33. pub const REG_ECOLLATE: c_int = 3;
  34. pub const REG_ECTYPE: c_int = 4;
  35. pub const REG_EESCAPE: c_int = 5;
  36. pub const REG_ESUBREG: c_int = 6;
  37. pub const REG_EBRACK: c_int = 7;
  38. pub const REG_ENOSYS: c_int = 8;
  39. pub const REG_EPAREN: c_int = 9;
  40. pub const REG_EBRACE: c_int = 10;
  41. pub const REG_BADBR: c_int = 11;
  42. pub const REG_ERANGE: c_int = 12;
  43. pub const REG_ESPACE: c_int = 13;
  44. pub const REG_BADRPT: c_int = 14;
  45. #[no_mangle]
  46. #[linkage = "weak"] // redefined in GIT
  47. pub unsafe extern "C" fn regcomp(out: *mut regex_t, pat: *const c_char, cflags: c_int) -> c_int {
  48. if cflags & REG_EXTENDED == REG_EXTENDED {
  49. return REG_ENOSYS;
  50. }
  51. let pat = slice::from_raw_parts(pat as *const u8, strlen(pat));
  52. let res = PosixRegexBuilder::new(pat)
  53. .with_default_classes()
  54. .compile_tokens();
  55. match res {
  56. Ok(mut branches) => {
  57. let re_nsub = PosixRegex::new(Cow::Borrowed(&branches)).count_groups();
  58. *out = regex_t {
  59. ptr: branches.as_mut_ptr() as *mut c_void,
  60. length: branches.len(),
  61. capacity: branches.capacity(),
  62. cflags,
  63. re_nsub,
  64. };
  65. mem::forget(branches);
  66. 0
  67. }
  68. Err(CompileError::EmptyRepetition)
  69. | Err(CompileError::IntegerOverflow)
  70. | Err(CompileError::IllegalRange) => REG_BADBR,
  71. Err(CompileError::UnclosedRepetition) => REG_EBRACE,
  72. Err(CompileError::LeadingRepetition) => REG_BADRPT,
  73. Err(CompileError::UnknownCollation) => REG_ECOLLATE,
  74. Err(CompileError::UnknownClass(_)) => REG_ECTYPE,
  75. Err(_) => REG_BADPAT,
  76. }
  77. }
  78. #[no_mangle]
  79. #[linkage = "weak"] // redefined in GIT
  80. pub unsafe extern "C" fn regfree(regex: *mut regex_t) {
  81. Vec::from_raw_parts(
  82. (*regex).ptr as *mut Vec<(Token, Range)>,
  83. (*regex).length,
  84. (*regex).capacity,
  85. );
  86. }
  87. #[no_mangle]
  88. #[linkage = "weak"] // redefined in GIT
  89. pub unsafe extern "C" fn regexec(
  90. regex: *const regex_t,
  91. input: *const c_char,
  92. nmatch: size_t,
  93. pmatch: *mut regmatch_t,
  94. eflags: c_int,
  95. ) -> c_int {
  96. if eflags & REG_EXTENDED == REG_EXTENDED {
  97. return REG_ENOSYS;
  98. }
  99. let regex = &*regex;
  100. // Allow specifying a compiler argument to the executor and vise versa
  101. // because why not?
  102. let flags = regex.cflags | eflags;
  103. let input = slice::from_raw_parts(input as *const u8, strlen(input));
  104. let branches = slice::from_raw_parts(regex.ptr as *const Vec<(Token, Range)>, regex.length);
  105. let matches = PosixRegex::new(Cow::Borrowed(&branches))
  106. .case_insensitive(flags & REG_ICASE == REG_ICASE)
  107. .newline(flags & REG_NEWLINE == REG_NEWLINE)
  108. .no_start(flags & REG_NOTBOL == REG_NOTBOL)
  109. .no_end(flags & REG_NOTEOL == REG_NOTEOL)
  110. .matches(input, Some(1));
  111. if !matches.is_empty() && eflags & REG_NOSUB != REG_NOSUB && !pmatch.is_null() && nmatch > 0 {
  112. let first = &matches[0];
  113. for i in 0..nmatch {
  114. let (start, end) = first.get(i).and_then(|&range| range).unwrap_or((!0, !0));
  115. *pmatch.add(i) = regmatch_t {
  116. rm_so: start,
  117. rm_eo: end,
  118. };
  119. }
  120. }
  121. if matches.is_empty() {
  122. REG_NOMATCH
  123. } else {
  124. 0
  125. }
  126. }
  127. #[no_mangle]
  128. #[linkage = "weak"] // redefined in GIT
  129. pub extern "C" fn regerror(
  130. code: c_int,
  131. _regex: *const regex_t,
  132. out: *mut c_char,
  133. max: size_t,
  134. ) -> size_t {
  135. let string = match code {
  136. 0 => "No error\0",
  137. REG_NOMATCH => "No match\0",
  138. REG_BADPAT => "Invalid regexp\0",
  139. REG_ECOLLATE => "Unknown collating element\0",
  140. REG_ECTYPE => "Unknown character class name\0",
  141. REG_EESCAPE => "Trailing backslash\0",
  142. REG_ESUBREG => "Invalid back reference\0",
  143. REG_EBRACK => "Missing ']'\0",
  144. REG_ENOSYS => "Unsupported operation\0",
  145. REG_EPAREN => "Missing ')'\0",
  146. REG_EBRACE => "Missing '}'\0",
  147. REG_BADBR => "Invalid contents of {}\0",
  148. REG_ERANGE => "Invalid character range\0",
  149. REG_ESPACE => "Out of memory\0",
  150. REG_BADRPT => "Repetition not preceded by valid expression\0",
  151. _ => "Unknown error\0",
  152. };
  153. unsafe {
  154. ptr::copy_nonoverlapping(
  155. string.as_ptr(),
  156. out as *mut u8,
  157. string.len().min(max as usize),
  158. );
  159. }
  160. string.len()
  161. }