|
@@ -10,9 +10,39 @@ use crate::{
|
|
|
|
|
|
use super::mbstate_t;
|
|
|
|
|
|
+// Based on
|
|
|
+// https://github.com/rust-lang/rust/blob/f24ce9b/library/core/src/str/validations.rs#L232-L257,
|
|
|
+// because apparently somebody removed the `pub use` statement from `core::str`.
|
|
|
+
|
|
|
+// https://tools.ietf.org/html/rfc3629
|
|
|
+static UTF8_CHAR_WIDTH: [u8; 256] = [
|
|
|
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
+ 1, // 0x1F
|
|
|
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
+ 1, // 0x3F
|
|
|
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
+ 1, // 0x5F
|
|
|
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
+ 1, // 0x7F
|
|
|
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ 0, // 0x9F
|
|
|
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ 0, // 0xBF
|
|
|
+ 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
|
+ 2, // 0xDF
|
|
|
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
|
|
|
+ 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
|
|
|
+];
|
|
|
+
|
|
|
+// Given a first byte, determines how many bytes are in this UTF-8 character.
|
|
|
+#[inline]
|
|
|
+fn utf8_char_width(b: u8) -> usize {
|
|
|
+ UTF8_CHAR_WIDTH[usize::from(b)].into()
|
|
|
+}
|
|
|
+
|
|
|
//It's guaranteed that we don't have any nullpointers here
|
|
|
pub unsafe fn mbrtowc(pwc: *mut wchar_t, s: *const c_char, n: usize, ps: *mut mbstate_t) -> usize {
|
|
|
- let size = str::utf8_char_width(*s as u8);
|
|
|
+ let size = utf8_char_width(*s as u8);
|
|
|
if size > n {
|
|
|
platform::errno = errno::EILSEQ;
|
|
|
return -2isize as usize;
|