blob: 68e5b7e14cf5fce4a896e0bcd4b9083dbf15a10b [file] [log] [blame]
Matthew Maurer19837bd2020-06-02 11:15:36 -07001// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Determine displayed width of `char` and `str` types according to
12//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
13//! rules.
14//!
15//! ```rust
16//! extern crate unicode_width;
17//!
18//! use unicode_width::UnicodeWidthStr;
19//!
20//! fn main() {
21//! let teststr = "Hello, world!";
22//! let width = UnicodeWidthStr::width(teststr);
23//! println!("{}", teststr);
24//! println!("The above string is {} columns wide.", width);
25//! let width = teststr.width_cjk();
26//! println!("The above string is {} columns wide (CJK).", width);
27//! }
28//! ```
29//!
30//! # features
31//!
32//! unicode-width supports a `no_std` feature. This eliminates dependence
33//! on std, and instead uses equivalent functions from core.
34//!
35//! # crates.io
36//!
37//! You can use this package in your project by adding the following
38//! to your `Cargo.toml`:
39//!
40//! ```toml
41//! [dependencies]
42//! unicode-width = "0.1.5"
43//! ```
44
45#![deny(missing_docs, unsafe_code)]
46#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
47 html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
48
49#![cfg_attr(feature = "bench", feature(test))]
50#![no_std]
51
Matthew Maurer5b9e9402020-07-06 13:34:49 -070052// ANDROID: Unconditionally use std to allow building as a dylib.
Matthew Maurer19837bd2020-06-02 11:15:36 -070053#[macro_use]
54extern crate std;
55
56#[cfg(feature = "bench")]
57extern crate test;
58
59use tables::charwidth as cw;
60pub use tables::UNICODE_VERSION;
61
62use core::ops::Add;
63
64mod tables;
65
66#[cfg(test)]
67mod tests;
68
69/// Methods for determining displayed width of Unicode characters.
70pub trait UnicodeWidthChar {
71 /// Returns the character's displayed width in columns, or `None` if the
72 /// character is a control character other than `'\x00'`.
73 ///
74 /// This function treats characters in the Ambiguous category according
75 /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
76 /// as 1 column wide. This is consistent with the recommendations for non-CJK
77 /// contexts, or when the context cannot be reliably determined.
78 fn width(self) -> Option<usize>;
79
80 /// Returns the character's displayed width in columns, or `None` if the
81 /// character is a control character other than `'\x00'`.
82 ///
83 /// This function treats characters in the Ambiguous category according
84 /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
85 /// as 2 columns wide. This is consistent with the recommendations for
86 /// CJK contexts.
87 fn width_cjk(self) -> Option<usize>;
88}
89
90impl UnicodeWidthChar for char {
91 #[inline]
92 fn width(self) -> Option<usize> { cw::width(self, false) }
93
94 #[inline]
95 fn width_cjk(self) -> Option<usize> { cw::width(self, true) }
96}
97
98/// Methods for determining displayed width of Unicode strings.
99pub trait UnicodeWidthStr {
100 /// Returns the string's displayed width in columns.
101 ///
102 /// Control characters are treated as having zero width.
103 ///
104 /// This function treats characters in the Ambiguous category according
105 /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
106 /// as 1 column wide. This is consistent with the recommendations for
107 /// non-CJK contexts, or when the context cannot be reliably determined.
108 fn width<'a>(&'a self) -> usize;
109
110 /// Returns the string's displayed width in columns.
111 ///
112 /// Control characters are treated as having zero width.
113 ///
114 /// This function treats characters in the Ambiguous category according
115 /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
116 /// as 2 column wide. This is consistent with the recommendations for
117 /// CJK contexts.
118 fn width_cjk<'a>(&'a self) -> usize;
119}
120
121impl UnicodeWidthStr for str {
122 #[inline]
123 fn width(&self) -> usize {
124 self.chars().map(|c| cw::width(c, false).unwrap_or(0)).fold(0, Add::add)
125 }
126
127 #[inline]
128 fn width_cjk(&self) -> usize {
129 self.chars().map(|c| cw::width(c, true).unwrap_or(0)).fold(0, Add::add)
130 }
131}