Jooyung Han | a7cd238 | 2021-07-16 01:22:24 +0000 | [diff] [blame] | 1 | use std::io::{self, Read}; |
| 2 | use std::str; |
| 3 | use std::fmt; |
| 4 | |
| 5 | #[derive(Debug)] |
| 6 | pub enum CharReadError { |
| 7 | UnexpectedEof, |
| 8 | Utf8(str::Utf8Error), |
| 9 | Io(io::Error) |
| 10 | } |
| 11 | |
| 12 | impl From<str::Utf8Error> for CharReadError { |
| 13 | fn from(e: str::Utf8Error) -> CharReadError { |
| 14 | CharReadError::Utf8(e) |
| 15 | } |
| 16 | } |
| 17 | |
| 18 | impl From<io::Error> for CharReadError { |
| 19 | fn from(e: io::Error) -> CharReadError { |
| 20 | CharReadError::Io(e) |
| 21 | } |
| 22 | } |
| 23 | |
| 24 | impl fmt::Display for CharReadError { |
| 25 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 26 | use self::CharReadError::*; |
| 27 | match *self { |
| 28 | UnexpectedEof => write!(f, "unexpected end of stream"), |
| 29 | Utf8(ref e) => write!(f, "UTF-8 decoding error: {}", e), |
| 30 | Io(ref e) => write!(f, "I/O error: {}", e) |
| 31 | } |
| 32 | } |
| 33 | } |
| 34 | |
| 35 | pub fn next_char_from<R: Read>(source: &mut R) -> Result<Option<char>, CharReadError> { |
| 36 | const MAX_CODEPOINT_LEN: usize = 4; |
| 37 | |
| 38 | let mut bytes = source.bytes(); |
| 39 | let mut buf = [0u8; MAX_CODEPOINT_LEN]; |
| 40 | let mut pos = 0; |
| 41 | |
| 42 | loop { |
| 43 | let next = match bytes.next() { |
| 44 | Some(Ok(b)) => b, |
| 45 | Some(Err(e)) => return Err(e.into()), |
| 46 | None if pos == 0 => return Ok(None), |
| 47 | None => return Err(CharReadError::UnexpectedEof) |
| 48 | }; |
| 49 | buf[pos] = next; |
| 50 | pos += 1; |
| 51 | |
| 52 | match str::from_utf8(&buf[..pos]) { |
| 53 | Ok(s) => return Ok(s.chars().next()), // always Some(..) |
| 54 | Err(_) if pos < MAX_CODEPOINT_LEN => {}, |
| 55 | Err(e) => return Err(e.into()) |
| 56 | } |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | #[cfg(test)] |
| 61 | mod tests { |
| 62 | #[test] |
| 63 | fn test_next_char_from() { |
| 64 | use std::io; |
| 65 | use std::error::Error; |
| 66 | |
| 67 | let mut bytes: &[u8] = "correct".as_bytes(); // correct ASCII |
| 68 | assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('c')); |
| 69 | |
| 70 | let mut bytes: &[u8] = "правильно".as_bytes(); // correct BMP |
| 71 | assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('п')); |
| 72 | |
| 73 | let mut bytes: &[u8] = "😊".as_bytes(); // correct non-BMP |
| 74 | assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('😊')); |
| 75 | |
| 76 | let mut bytes: &[u8] = b""; // empty |
| 77 | assert_eq!(super::next_char_from(&mut bytes).unwrap(), None); |
| 78 | |
| 79 | let mut bytes: &[u8] = b"\xf0\x9f\x98"; // incomplete code point |
| 80 | match super::next_char_from(&mut bytes).unwrap_err() { |
| 81 | super::CharReadError::UnexpectedEof => {}, |
| 82 | e => panic!("Unexpected result: {:?}", e) |
| 83 | }; |
| 84 | |
| 85 | let mut bytes: &[u8] = b"\xff\x9f\x98\x32"; // invalid code point |
| 86 | match super::next_char_from(&mut bytes).unwrap_err() { |
| 87 | super::CharReadError::Utf8(_) => {}, |
| 88 | e => panic!("Unexpected result: {:?}", e) |
| 89 | }; |
| 90 | |
| 91 | |
| 92 | // error during read |
| 93 | struct ErrorReader; |
| 94 | impl io::Read for ErrorReader { |
| 95 | fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
| 96 | Err(io::Error::new(io::ErrorKind::Other, "test error")) |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | let mut r = ErrorReader; |
| 101 | match super::next_char_from(&mut r).unwrap_err() { |
| 102 | super::CharReadError::Io(ref e) if e.kind() == io::ErrorKind::Other && |
| 103 | e.description() == "test error" => {}, |
| 104 | e => panic!("Unexpected result: {:?}", e) |
| 105 | } |
| 106 | } |
| 107 | } |