Matthew Maurer | 19837bd | 2020-06-02 11:15:36 -0700 | [diff] [blame] | 1 | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
| 2 | // file at the top-level directory of this distribution and at |
| 3 | // http://rust-lang.org/COPYRIGHT. |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 8 | // option. This file may not be copied, modified, or distributed |
| 9 | // except according to those terms. |
| 10 | |
| 11 | //! Determine displayed width of `char` and `str` types according to |
| 12 | //! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
| 13 | //! rules. |
| 14 | //! |
| 15 | //! ```rust |
| 16 | //! extern crate unicode_width; |
| 17 | //! |
| 18 | //! use unicode_width::UnicodeWidthStr; |
| 19 | //! |
| 20 | //! fn main() { |
| 21 | //! let teststr = "Hello, world!"; |
| 22 | //! let width = UnicodeWidthStr::width(teststr); |
| 23 | //! println!("{}", teststr); |
| 24 | //! println!("The above string is {} columns wide.", width); |
| 25 | //! let width = teststr.width_cjk(); |
| 26 | //! println!("The above string is {} columns wide (CJK).", width); |
| 27 | //! } |
| 28 | //! ``` |
| 29 | //! |
| 30 | //! # features |
| 31 | //! |
| 32 | //! unicode-width supports a `no_std` feature. This eliminates dependence |
| 33 | //! on std, and instead uses equivalent functions from core. |
| 34 | //! |
| 35 | //! # crates.io |
| 36 | //! |
| 37 | //! You can use this package in your project by adding the following |
| 38 | //! to your `Cargo.toml`: |
| 39 | //! |
| 40 | //! ```toml |
| 41 | //! [dependencies] |
| 42 | //! unicode-width = "0.1.5" |
| 43 | //! ``` |
| 44 | |
| 45 | #![deny(missing_docs, unsafe_code)] |
| 46 | #![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png", |
| 47 | html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")] |
| 48 | |
| 49 | #![cfg_attr(feature = "bench", feature(test))] |
| 50 | #![no_std] |
| 51 | |
Matthew Maurer | 5b9e940 | 2020-07-06 13:34:49 -0700 | [diff] [blame] | 52 | // ANDROID: Unconditionally use std to allow building as a dylib. |
Matthew Maurer | 19837bd | 2020-06-02 11:15:36 -0700 | [diff] [blame] | 53 | #[macro_use] |
| 54 | extern crate std; |
| 55 | |
| 56 | #[cfg(feature = "bench")] |
| 57 | extern crate test; |
| 58 | |
| 59 | use tables::charwidth as cw; |
| 60 | pub use tables::UNICODE_VERSION; |
| 61 | |
| 62 | use core::ops::Add; |
| 63 | |
| 64 | mod tables; |
| 65 | |
| 66 | #[cfg(test)] |
| 67 | mod tests; |
| 68 | |
| 69 | /// Methods for determining displayed width of Unicode characters. |
| 70 | pub trait UnicodeWidthChar { |
| 71 | /// Returns the character's displayed width in columns, or `None` if the |
| 72 | /// character is a control character other than `'\x00'`. |
| 73 | /// |
| 74 | /// This function treats characters in the Ambiguous category according |
| 75 | /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
| 76 | /// as 1 column wide. This is consistent with the recommendations for non-CJK |
| 77 | /// contexts, or when the context cannot be reliably determined. |
| 78 | fn width(self) -> Option<usize>; |
| 79 | |
| 80 | /// Returns the character's displayed width in columns, or `None` if the |
| 81 | /// character is a control character other than `'\x00'`. |
| 82 | /// |
| 83 | /// This function treats characters in the Ambiguous category according |
| 84 | /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
| 85 | /// as 2 columns wide. This is consistent with the recommendations for |
| 86 | /// CJK contexts. |
| 87 | fn width_cjk(self) -> Option<usize>; |
| 88 | } |
| 89 | |
| 90 | impl UnicodeWidthChar for char { |
| 91 | #[inline] |
| 92 | fn width(self) -> Option<usize> { cw::width(self, false) } |
| 93 | |
| 94 | #[inline] |
| 95 | fn width_cjk(self) -> Option<usize> { cw::width(self, true) } |
| 96 | } |
| 97 | |
| 98 | /// Methods for determining displayed width of Unicode strings. |
| 99 | pub trait UnicodeWidthStr { |
| 100 | /// Returns the string's displayed width in columns. |
| 101 | /// |
| 102 | /// Control characters are treated as having zero width. |
| 103 | /// |
| 104 | /// This function treats characters in the Ambiguous category according |
| 105 | /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
| 106 | /// as 1 column wide. This is consistent with the recommendations for |
| 107 | /// non-CJK contexts, or when the context cannot be reliably determined. |
| 108 | fn width<'a>(&'a self) -> usize; |
| 109 | |
| 110 | /// Returns the string's displayed width in columns. |
| 111 | /// |
| 112 | /// Control characters are treated as having zero width. |
| 113 | /// |
| 114 | /// This function treats characters in the Ambiguous category according |
| 115 | /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
| 116 | /// as 2 column wide. This is consistent with the recommendations for |
| 117 | /// CJK contexts. |
| 118 | fn width_cjk<'a>(&'a self) -> usize; |
| 119 | } |
| 120 | |
| 121 | impl UnicodeWidthStr for str { |
| 122 | #[inline] |
| 123 | fn width(&self) -> usize { |
| 124 | self.chars().map(|c| cw::width(c, false).unwrap_or(0)).fold(0, Add::add) |
| 125 | } |
| 126 | |
| 127 | #[inline] |
| 128 | fn width_cjk(&self) -> usize { |
| 129 | self.chars().map(|c| cw::width(c, true).unwrap_or(0)).fold(0, Add::add) |
| 130 | } |
| 131 | } |