utf8-chars-3.0.6/.cargo_vcs_info.json0000644000000001361046102023000130500ustar { "git": { "sha1": "340282314101cf9e6c04452b1ec1cea60b4d7281" }, "path_in_vcs": "" }utf8-chars-3.0.6/.github/workflows/ci.yml000064400000000000000000000013761046102023000163400ustar 00000000000000name: CI on: push: branches: [ master ] pull_request: branches: [ master ] schedule: - cron: '0 0 * * *' env: CARGO_TERM_COLOR: always jobs: main: strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] rust: ["1.70", stable, beta, nightly] runs-on: ${{ matrix.os }} steps: - uses: actions-rs/toolchain@v1 with: toolchain: ${{ matrix.rust }} override: true - run: "rustup component add clippy" - uses: actions/checkout@v2 - run: "cargo build --verbose" - run: "cargo test --verbose --tests" - run: "cargo test --verbose --doc" - run: "cargo doc --verbose" - run: "cargo build --verbose --release" - run: "cargo clippy --verbose" utf8-chars-3.0.6/.gitignore000064400000000000000000000000471046102023000136070ustar 00000000000000/target/ Cargo.lock **/*.rs.bk /.idea/ utf8-chars-3.0.6/Cargo.lock0000644000000155471046102023000110370ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "aho-corasick" version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] [[package]] name = "arrayvec" version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "env_logger" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" dependencies = [ "log", "regex", ] [[package]] name = "getrandom" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "getrandom" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", "wasip2", ] [[package]] name = "libc" version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "log" version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "memchr" version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "ppv-lite86" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ "zerocopy", ] [[package]] name = "proc-macro2" version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] name = "quickcheck" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" dependencies = [ "env_logger", "log", "rand 0.8.5", ] [[package]] name = "quickcheck_macros" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f71ee38b42f8459a88d3362be6f9b841ad2d5421844f61eb1c59c11bff3ac14a" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "quote" version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] [[package]] name = "r-efi" version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "rand_core 0.6.4", ] [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", "rand_core 0.9.3", ] [[package]] name = "rand_chacha" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", "rand_core 0.9.3", ] [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom 0.2.16", ] [[package]] name = "rand_core" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ "getrandom 0.3.4", ] [[package]] name = "regex" version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "syn" version = "2.0.110" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "unicode-ident" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "utf8-chars" version = "3.0.6" dependencies = [ "arrayvec", "quickcheck", "quickcheck_macros", "rand 0.9.2", ] [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ "wit-bindgen", ] [[package]] name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "zerocopy" version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", "syn", ] utf8-chars-3.0.6/Cargo.toml0000644000000025371046102023000110550ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.70" name = "utf8-chars" version = "3.0.6" authors = ["warlock "] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Char-by-char iterator and `read_char` method for `BufRead`." documentation = "https://docs.rs/utf8-chars/" readme = "README.md" keywords = [ "utf-8", "utf8", "chars", "iterator", "read_char", ] categories = ["encoding"] license = "MIT OR Apache-2.0" repository = "https://github.com/A1-Triard/utf8-chars" [badges.maintenance] status = "passively-maintained" [features] bench = [] default = [] [lib] name = "utf8_chars" path = "src/lib.rs" [dependencies.arrayvec] version = "0.7.2" [dev-dependencies.quickcheck] version = "1.0.3" [dev-dependencies.quickcheck_macros] version = "1.0.0" [dev-dependencies.rand] version = "0.9.2" utf8-chars-3.0.6/Cargo.toml.orig000064400000000000000000000012321046102023000145030ustar 00000000000000[package] edition = "2021" name = "utf8-chars" version = "3.0.6" rust-version = "1.70" authors = ["warlock "] description = "Char-by-char iterator and `read_char` method for `BufRead`." readme = "README.md" documentation = "https://docs.rs/utf8-chars/" keywords = ["utf-8", "utf8", "chars", "iterator", "read_char"] categories = ["encoding"] license = "MIT OR Apache-2.0" repository = "https://github.com/A1-Triard/utf8-chars" [features] default = [] bench = [] [dependencies] arrayvec = "0.7.2" [dev-dependencies] quickcheck = "1.0.3" quickcheck_macros = "1.0.0" rand = "0.9.2" [badges] maintenance = { status = "passively-maintained" } utf8-chars-3.0.6/README.md000064400000000000000000000005541046102023000131010ustar 00000000000000![maintenance: passively maintained](https://img.shields.io/badge/maintenance-passively--maintained-yellowgreen.svg) # utf8-chars Char-by-char iterator and `read_char` method for `BufRead`. ```rust use std::io::stdin; use utf8_chars::BufReadCharsExt; fn main() { for c in stdin().lock().chars().map(|x| x.unwrap()) { println!("{}", c); } } ``` utf8-chars-3.0.6/src/lib.rs000064400000000000000000000435231046102023000135300ustar 00000000000000#![cfg_attr(feature="bench", feature(test))] #![deny(warnings)] #![allow(clippy::needless_doctest_main)] #![allow(clippy::needless_lifetimes)] #![doc(test(attr(deny(warnings))))] #![doc(test(attr(allow(dead_code))))] #![doc(test(attr(allow(unused_variables))))] #[cfg(all(feature="bench", test))] extern crate test; #[doc=include_str!("../README.md")] type _DocTestReadme = (); use std::fmt::{self}; use std::char::{self}; use std::error::{Error}; use std::io::{self, BufRead}; use arrayvec::{ArrayVec}; /// A structure, containing read bytes, and an [`io::Error`]. /// /// The `io::Error` is an actual I/O error if some occurred, /// or a synthetic error with either the [`UnexpectedEof`](std::io::ErrorKind::UnexpectedEof) /// kind if a multi-byte char was unexpectedly terminated, /// either the [`InvalidData`](std::io::ErrorKind::InvalidData) /// kind if no actual I/O error occurred, but read byte sequence was not recognised as a valid UTF-8. #[derive(Debug)] pub struct ReadCharError { bytes: ArrayVec, io_error: io::Error, } impl ReadCharError { /// A byte sequence, representing an invalid or incomplete UTF-8-encoded char. pub fn as_bytes(&self) -> &[u8] { &self.bytes } /// Returns a reference to the I/O error. pub fn as_io_error(&self) -> &io::Error { &self.io_error } /// Consumes the `ReadCharError`, returning the I/O error. pub fn into_io_error(self) -> io::Error { self.io_error } } impl Error for ReadCharError { fn source(&self) -> Option<&(dyn Error + 'static)> { Some(&self.io_error) } } impl From for io::Error { fn from(e: ReadCharError) -> io::Error { e.into_io_error() } } impl fmt::Display for ReadCharError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "invalid UTF-8 byte sequence")?; for b in self.as_bytes() { write!(f, " {b:02X}")?; } write!(f, " read")?; match self.as_io_error().kind() { io::ErrorKind::InvalidData => { }, io::ErrorKind::UnexpectedEof => { write!(f, " (unexpected EOF)")?; } _ => { write!(f, " ({})", self.as_io_error())?; } } Ok(()) } } /// An iterator over the chars of an instance of [`BufRead`]. /// /// In contrast to [`CharsRaw`], the error type is /// [`io::Error`], and therefore more likely to be drop-in /// compatible, at the price of losing the UTF-8 context bytes in the error /// message. /// /// This struct is generally created by calling /// [`chars`](BufReadCharsExt::chars) on a [`BufRead`]. #[derive(Debug)] pub struct Chars<'a, T: BufRead + ?Sized>(&'a mut T); impl<'a, T: BufRead + ?Sized> Iterator for Chars<'a, T> { type Item = io::Result; fn next(&mut self) -> Option { self.0.read_char_raw().map_err(|x| x.into_io_error()).transpose() } } /// An iterator over the chars of an instance of [`BufRead`]. /// /// This struct is generally created by calling [`chars_raw`](BufReadCharsExt::chars_raw) /// on a [`BufRead`]. #[derive(Debug)] pub struct CharsRaw<'a, T: BufRead + ?Sized>(&'a mut T); impl<'a, T: BufRead + ?Sized> Iterator for CharsRaw<'a, T> { type Item = Result; fn next(&mut self) -> Option { self.0.read_char_raw().transpose() } } const CHAR_MAX_LEN: u8 = 4; const LEAD_BYTE_MASK: [u8; CHAR_MAX_LEN as usize] = [0x7F, 0x1F, 0x0F, 0x07]; const TAIL_BYTE_MASK: u8 = 0x3F; const TAIL_BYTE_SIGNATURE: u8 = 0x80; const TAIL_BYTE_BITS_COUNT: u8 = 6; const CHAR_MIN_VALUE: [u32; CHAR_MAX_LEN as usize] = [0, 0x80, 0x800, 0x10000]; fn read_byte_and_ignore_interrupts(reader: &mut (impl BufRead + ?Sized)) -> io::Result> { loop { match reader.fill_buf() { Ok(buf) => return Ok(buf.first().copied()), Err(e) => { if e.kind() != io::ErrorKind::Interrupted { return Err(e) } } } }; } /// Extends [`BufRead`] with methods for reading chars. pub trait BufReadCharsExt : BufRead { /// Returns an iterator over the chars of this reader. /// /// In contrast to [`chars_raw`](BufReadCharsExt::chars_raw), the error type is /// [`io::Error`], and therefore more likely to be drop-in /// compatible, at the price of losing the UTF-8 context bytes in the error /// message. /// /// The iterator returned from this function will yield instances of /// [`io::Result`]``. fn chars(&mut self) -> Chars<'_, Self> { Chars(self) } /// Returns an iterator over the chars of this reader. /// /// The iterator returned from this function will yield instances of /// [`Result`]``. fn chars_raw(&mut self) -> CharsRaw<'_, Self> { CharsRaw(self) } /// Reads a char from the underlying reader. /// /// In contrast to [`read_char_raw`](BufReadCharsExt::read_char_raw), the error type is /// [`io::Error`], and therefore more likely to be drop-in /// compatible, at the price of losing the UTF-8 context bytes in the error /// message. /// /// Returns /// - `Ok(Some(char))` if a char has successfully read, /// - `Ok(None)` if the stream has reached EOF before any byte was read, /// - `Err(err)` if an I/O error occurred, or read byte sequence was not recognised as a valid UTF-8. /// /// If this function encounters an error of the kind /// [`io::ErrorKind::Interrupted`] /// then the error is ignored and the operation will continue. fn read_char(&mut self) -> io::Result> { self.read_char_raw().map_err(|x| x.into_io_error()) } /// Reads a char from the underlying reader. /// /// Returns /// - `Ok(Some(char))` if a char has successfully read, /// - `Ok(None)` if the stream has reached EOF before any byte was read, /// - `Err(err)` if an I/O error occurred, or read byte sequence was not recognised as a valid UTF-8. /// /// If this function encounters an error of the kind /// [`io::ErrorKind::Interrupted`] /// then the error is ignored and the operation will continue. fn read_char_raw(&mut self) -> Result, ReadCharError> { match read_byte_and_ignore_interrupts(self) { Err(e) => Err(ReadCharError { bytes: ArrayVec::new(), io_error: e }), Ok(None) => Ok(None), Ok(Some(lead_byte)) => { self.consume(1); let leading_ones = lead_byte.leading_ones(); if leading_ones == 0 { return Ok(Some(char::from(lead_byte))); } if leading_ones == 1 || leading_ones > 4 { let mut bytes = ArrayVec::new(); bytes.push(lead_byte); return Err(ReadCharError { bytes, io_error: io::Error::from(io::ErrorKind::InvalidData) }); } let mut bytes = ArrayVec::new(); bytes.push(lead_byte); let tail_bytes_count = (leading_ones - 1) as u8; let mut item = ((lead_byte & LEAD_BYTE_MASK[tail_bytes_count as usize]) as u32) << (TAIL_BYTE_BITS_COUNT * tail_bytes_count) ; for tail_byte_index in (0 .. tail_bytes_count).rev() { match read_byte_and_ignore_interrupts(self) { Err(e) => return Err(ReadCharError { bytes, io_error: e }), Ok(None) => return Err(ReadCharError { bytes, io_error: io::Error::from(io::ErrorKind::UnexpectedEof) }), Ok(Some(tail_byte)) => { if tail_byte & !TAIL_BYTE_MASK != TAIL_BYTE_SIGNATURE { return Err(ReadCharError { bytes, io_error: io::Error::from(io::ErrorKind::InvalidData) }); } bytes.push(tail_byte); item |= ((tail_byte & TAIL_BYTE_MASK) as u32) << (tail_byte_index * TAIL_BYTE_BITS_COUNT) ; self.consume(1); } } } if item < CHAR_MIN_VALUE[tail_bytes_count as usize] { return Err(ReadCharError { bytes, io_error: io::Error::from(io::ErrorKind::InvalidData) }); } match char::from_u32(item) { None => Err(ReadCharError { bytes, io_error: io::Error::from(io::ErrorKind::InvalidData) }), Some(item) => Ok(Some(item)) } } } } } impl BufReadCharsExt for T { } #[cfg(test)] mod tests { use quickcheck_macros::quickcheck; use std::io::{BufRead, BufReader, ErrorKind}; use crate::{BufReadCharsExt}; #[test] fn read_valid_unicode() { assert_eq!(vec!['A', 'B', 'c', 'd', ' ', 'А', 'Б', 'в', 'г', 'д', ' ', 'U', '\0'], BufReader::new("ABcd АБвгд U\0".as_bytes()).chars_raw().map(|x| x.unwrap()).collect::>()); } #[test] fn edgecase_one_two_bytes() { assert_eq!(vec!['\x7F'], BufReader::new(&[ 0x7F ][..]).chars_raw().map(|x| x.unwrap()).collect::>()); assert_eq!(vec!['\u{0080}'], BufReader::new(&[ 0xC2, 0x80 ][..]).chars_raw().map(|x| x.unwrap()).collect::>()); let mut bytes = BufReader::new(&[ 0xC2 ][..]); let res = bytes.chars_raw().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(&[0xC2][..], err.as_bytes()); assert_eq!(ErrorKind::UnexpectedEof, err.as_io_error().kind()); let mut bytes = BufReader::new(&[ 0xC1, 0xBF ][..]); let res = bytes.chars_raw().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(&[0xC1, 0xBF][..], err.as_bytes()); assert_eq!(ErrorKind::InvalidData, err.as_io_error().kind()); } #[test] fn edgecase_two_three_bytes() { assert_eq!(vec!['\u{07FF}'], BufReader::new(&[ 0xDF, 0xBF ][..]).chars_raw().map(|x| x.unwrap()).collect::>()); assert_eq!(vec!['\u{0800}'], BufReader::new(&[ 0xE0, 0xA0, 0x80 ][..]).chars_raw().map(|x| x.unwrap()).collect::>()); let mut bytes = BufReader::new(&[ 0xE0, 0xA0 ][..]); let res = bytes.chars_raw().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(&[0xE0, 0xA0][..], err.as_bytes()); assert_eq!(ErrorKind::UnexpectedEof, err.as_io_error().kind()); let mut bytes = BufReader::new(&[ 0xE0, 0x9F, 0xBF ][..]); let res = bytes.chars_raw().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(&[0xE0, 0x9F, 0xBF][..], err.as_bytes()); assert_eq!(ErrorKind::InvalidData, err.as_io_error().kind()); } #[test] fn edgecase_three_four_bytes() { assert_eq!(vec!['\u{00FFFF}'], BufReader::new(&[ 0xEF, 0xBF, 0xBF ][..]).chars_raw().map(|x| x.unwrap()).collect::>()); assert_eq!(vec!['\u{010000}'], BufReader::new(&[ 0xF0, 0x90, 0x80, 0x80 ][..]).chars_raw().map(|x| x.unwrap()).collect::>()); let mut bytes = BufReader::new(&[ 0xF0, 0x90, 0x80 ][..]); let res = bytes.chars_raw().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(&[0xF0, 0x90, 0x80][..], err.as_bytes()); assert_eq!(ErrorKind::UnexpectedEof, err.as_io_error().kind()); let mut bytes = BufReader::new(&[ 0xF0, 0x8F, 0xBF, 0xBF ][..]); let res = bytes.chars_raw().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(&[0xF0, 0x8F, 0xBF, 0xBF][..], err.as_bytes()); assert_eq!(ErrorKind::InvalidData, err.as_io_error().kind()); } #[test] fn edgecase_four_bytes_max() { assert_eq!(vec!['\u{10FFFF}'], BufReader::new(&[ 0xF4, 0x8F, 0xBF, 0xBF ][..]).chars_raw().map(|x| x.unwrap()).collect::>()); // BufReader::new(&[ 0xF7, 0xBF, 0xBF, 0xBF ][..]).chars_raw().map(|x| x.unwrap()).collect::>()); let mut bytes = BufReader::new(&[ 0xF8, 0x41 ][..]); let res = bytes.chars_raw().collect::>(); assert_eq!(2, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(&[0xF8][..], err.as_bytes()); assert_eq!(ErrorKind::InvalidData, err.as_io_error().kind()); let normal_char = res[1].as_ref().unwrap(); assert_eq!(&'A', normal_char); // Now we want to force `read_char` to make this call: assert_eq!(None, std::char::from_u32(0x00110000)); // Sadly, there is no more specific way to test this. let mut bytes = BufReader::new(&[ 0xF4, 0x90, 0x80, 0x80 ][..]); let res = bytes.chars_raw().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(&[0xF4, 0x90, 0x80, 0x80][..], err.as_bytes()); assert_eq!(ErrorKind::InvalidData, err.as_io_error().kind()); } #[test] fn read_io_valid_unicode() { assert_eq!(vec!['A', 'B', 'c', 'd', ' ', 'А', 'Б', 'в', 'г', 'д', ' ', 'U', '\0'], BufReader::new("ABcd АБвгд U\0".as_bytes()).chars().map(|x| x.unwrap()).collect::>()); } #[test] fn read_valid_unicode_from_dyn_read() { let bytes: &mut dyn BufRead = &mut BufReader::new("ABcd АБвгд UV".as_bytes()); assert_eq!( vec!['A', 'B', 'c', 'd', ' ', 'А', 'Б', 'в', 'г', 'д', ' ', 'U', 'V'], bytes.chars_raw().map(|x| x.unwrap()).collect::>() ); } #[test] fn do_not_take_extra_bytes() { let mut bytes = BufReader::new("ABcd АБвгд UV".as_bytes()); assert_eq!(vec!['A', 'B', 'c', 'd'], bytes.chars_raw().take(4).map(|x| x.unwrap()).collect::>()); assert_eq!(vec![' ', 'А', 'Б', 'в', 'г', 'д', ' ', 'U', 'V'], bytes.chars_raw().map(|x| x.unwrap()).collect::>()); } #[test] fn read_value_out_of_range() { let mut bytes = BufReader::new(&[ 0xF5, 0x8F, 0xBF, 0xBF ][..]); let res = bytes.chars_raw().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(&[0xF5, 0x8F, 0xBF, 0xBF][..], err.as_bytes()); } #[test] fn read_io_value_out_of_range() { let mut bytes = BufReader::new(&[ 0xF5, 0x8F, 0xBF, 0xBF ][..]); let res = bytes.chars().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(ErrorKind::InvalidData, err.kind()); } #[test] fn read_io_incomplete_twobyte() { let mut bytes = BufReader::new(&[ 0xC3 ][..]); // 0xC3 0xA4 = 'ä' let res = bytes.chars().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(ErrorKind::UnexpectedEof, err.kind()); } #[test] fn read_io_incomplete_threebyte() { let mut bytes = BufReader::new(&[ 0xE1, 0xBA ][..]); // 0xE1 0xBA 0xB9 = 'ẹ' let res = bytes.chars().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(ErrorKind::UnexpectedEof, err.kind()); } #[test] fn read_surrogate() { let mut bytes = BufReader::new(&[ 0xED, 0xA0, 0x80 ][..]); let res = bytes.chars_raw().collect::>(); assert_eq!(1, res.len()); let err = res[0].as_ref().err().unwrap(); assert_eq!(&[0xED, 0xA0, 0x80][..], err.as_bytes()); } #[test] fn read_invalid_sequences() { let mut bytes = BufReader::new(&[ 0x81, 0x82, 0xC1, 0x07, 0xC1, 0x87, 0xC2, 0xC2, 0x82, 0xF7, 0x88, 0x89, 0x07 ][..]); let res = bytes.chars_raw().collect::>(); assert_eq!(9, res.len()); assert_eq!(&[0x81][..], res[0].as_ref().err().unwrap().as_bytes()); assert_eq!(&[0x82][..], res[1].as_ref().err().unwrap().as_bytes()); assert_eq!(&[0xC1][..], res[2].as_ref().err().unwrap().as_bytes()); assert_eq!('\x07', *res[3].as_ref().unwrap()); assert_eq!(&[0xC1, 0x87][..], res[4].as_ref().err().unwrap().as_bytes()); assert_eq!(&[0xC2][..], res[5].as_ref().err().unwrap().as_bytes()); assert_eq!('\u{82}', *res[6].as_ref().unwrap()); assert_eq!(&[0xF7, 0x88, 0x89][..], res[7].as_ref().err().unwrap().as_bytes()); assert_eq!('\x07', *res[8].as_ref().unwrap()); } #[quickcheck] fn read_string(s: String) -> bool { let mut t = String::new(); BufReader::new(s.as_bytes()).chars_raw().for_each(|c| t.push(c.unwrap())); s == t } #[quickcheck] fn read_array(b: Vec) -> bool { let mut t = Vec::new(); BufReader::new(&b[..]).chars_raw().for_each(|c| t.append(&mut c.map_or_else(|e| e.as_bytes().to_vec(), |s| s.to_string().as_bytes().to_vec())) ); b == t } } #[cfg(all(feature="bench", test))] mod benchs { use rand::distributions::{Distribution, Uniform}; use rand::thread_rng; use std::hint::black_box; use std::io::BufReader; use std::vec::{Vec}; use test::Bencher; use crate::{BufReadCharsExt}; #[bench] fn read_array_bench(b: &mut Bencher) { let mut rng = thread_rng(); let mut bytes: Vec = Uniform::new_inclusive(0u8, 255u8).sample_iter(&mut rng).take(10000).collect(); b.iter(move || { black_box(&mut bytes); black_box(BufReader::new(&bytes[..]).chars_raw().last()); }); } }