license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame^] | 1 | // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 4 | // |
| 5 | // A wrapper around Uniscribe that provides a reasonable API. |
| 6 | |
| 7 | #ifndef BASE_GFX_UNISCRIBE_H__ |
| 8 | #define BASE_GFX_UNISCRIBE_H__ |
| 9 | |
| 10 | #include <windows.h> |
| 11 | #include <usp10.h> |
| 12 | #include <wchar.h> |
| 13 | #include <map> |
| 14 | #include <vector> |
| 15 | |
| 16 | #include "base/stack_container.h" |
| 17 | #include "testing/gtest/include/gtest/gtest_prod.h" |
| 18 | |
| 19 | namespace gfx { |
| 20 | |
| 21 | #define UNISCRIBE_STATE_STACK_RUNS 8 |
| 22 | #define UNISCRIBE_STATE_STACK_CHARS 32 |
| 23 | |
| 24 | // This object should be safe to create & destroy frequently, as long as the |
| 25 | // caller preserves the script_cache when possible (this data may be slow to |
| 26 | // compute). |
| 27 | // |
| 28 | // This object is "kind of large" (~1K) because it reserves a lot of space for |
| 29 | // working with to avoid expensive heap operations. Therefore, not only should |
| 30 | // you not worry about creating and destroying it, you should try to not keep |
| 31 | // them around. |
| 32 | class UniscribeState { |
| 33 | public: |
| 34 | // Initializes this Uniscribe run with the text pointed to by |run| with |
| 35 | // |length|. The input is NOT null terminated. |
| 36 | // |
| 37 | // The is_rtl flag should be set if the input script is RTL. It is assumed |
| 38 | // that the caller has already divided up the input text (using ICU, for |
| 39 | // example) into runs of the same direction of script. This avoids |
| 40 | // disagreements between the caller and Uniscribe later (see FillItems). |
| 41 | // |
| 42 | // A script cache should be provided by the caller that is initialized to |
| 43 | // NULL. When the caller is done with the cache (it may be stored between |
| 44 | // runs as long as it is used consistently with the same HFONT), it should |
| 45 | // call ScriptFreeCache(). |
| 46 | UniscribeState(const wchar_t* input, |
| 47 | int input_length, |
| 48 | bool is_rtl, |
| 49 | HFONT hfont, |
| 50 | SCRIPT_CACHE* script_cache, |
| 51 | SCRIPT_FONTPROPERTIES* font_properties); |
| 52 | |
| 53 | virtual ~UniscribeState(); |
| 54 | |
| 55 | // Sets Uniscribe's directional override flag. False by default. |
| 56 | bool directional_override() const { |
| 57 | return directional_override_; |
| 58 | } |
| 59 | void set_directional_override(bool override) { |
| 60 | directional_override_ = override; |
| 61 | } |
| 62 | |
| 63 | // Set's Uniscribe's no-ligate override flag. False by default. |
| 64 | bool inhibit_ligate() const { |
| 65 | return inhibit_ligate_; |
| 66 | } |
| 67 | void set_inhibit_ligate(bool inhibit) { |
| 68 | inhibit_ligate_ = inhibit; |
| 69 | } |
| 70 | |
| 71 | // Set letter spacing. We will try to insert this much space between |
| 72 | // graphemes (one or more glyphs perceived as a single unit by ordinary users |
| 73 | // of a script). Positive values increase letter spacing, negative values |
| 74 | // decrease it. 0 by default. |
| 75 | int letter_spacing() const { |
| 76 | return letter_spacing_; |
| 77 | } |
| 78 | void set_letter_spacing(int letter_spacing) { |
| 79 | letter_spacing_ = letter_spacing; |
| 80 | } |
| 81 | |
| 82 | // Set the width of a standard space character. We use this to normalize |
| 83 | // space widths. Windows will make spaces after Hindi characters larger than |
| 84 | // other spaces. A space_width of 0 means to use the default space width. |
| 85 | // |
| 86 | // Must be set before Init() is called. |
| 87 | int space_width() const { |
| 88 | return space_width_; |
| 89 | } |
| 90 | void set_space_width(int space_width) { |
| 91 | space_width_ = space_width; |
| 92 | } |
| 93 | |
| 94 | // Set word spacing. We will try to insert this much extra space between |
| 95 | // each word in the input (beyond whatever whitespace character separates |
| 96 | // words). Positive values lead to increased letter spacing, negative values |
| 97 | // decrease it. 0 by default. |
| 98 | // |
| 99 | // Must be set before Init() is called. |
| 100 | int word_spacing() const { |
| 101 | return word_spacing_; |
| 102 | } |
| 103 | void set_word_spacing(int word_spacing) { |
| 104 | word_spacing_ = word_spacing; |
| 105 | } |
| 106 | void set_ascent(int ascent) { |
| 107 | ascent_ = ascent; |
| 108 | } |
| 109 | |
| 110 | // You must call this after setting any options but before doing any |
| 111 | // other calls like asking for widths or drawing. |
| 112 | void Init() { InitWithOptionalLengthProtection(true); } |
| 113 | |
| 114 | // Returns the total width in pixels of the text run. |
| 115 | int Width() const; |
| 116 | |
| 117 | // Call to justify the text, with the amount of space that should be ADDED to |
| 118 | // get the desired width that the column should be justified to. Normally, |
| 119 | // spaces are inserted, but for Arabic there will be kashidas (extra strokes) |
| 120 | // inserted instead. |
| 121 | // |
| 122 | // This function MUST be called AFTER Init(). |
| 123 | void Justify(int additional_space); |
| 124 | |
| 125 | // Computes the given character offset into a pixel offset of the beginning |
| 126 | // of that character. |
| 127 | int CharacterToX(int offset) const; |
| 128 | |
| 129 | // Converts the given pixel X position into a logical character offset into |
| 130 | // the run. For positions appearing before the first character, this will |
| 131 | // return -1. |
| 132 | int XToCharacter(int x) const; |
| 133 | |
| 134 | // Draws the given characters to (x, y) in the given DC. The font will be |
| 135 | // handled by this function, but the font color and other attributes should |
| 136 | // be pre-set. |
| 137 | // |
| 138 | // The y position is the upper left corner, NOT the baseline. |
| 139 | void Draw(HDC dc, int x, int y, int from, int to); |
| 140 | |
| 141 | // Returns the first glyph assigned to the character at the given offset. |
| 142 | // This function is used to retrieve glyph information when Uniscribe is |
| 143 | // being used to generate glyphs for non-complex, non-BMP (above U+FFFF) |
| 144 | // characters. These characters are not otherwise special and have no |
| 145 | // complex shaping rules, so we don't otherwise need Uniscribe, except |
| 146 | // Uniscribe is the only way to get glyphs for non-BMP characters. |
| 147 | // |
| 148 | // Returns 0 if there is no glyph for the given character. |
| 149 | WORD FirstGlyphForCharacter(int char_offset) const; |
| 150 | |
| 151 | protected: |
| 152 | // Backend for init. The flag allows the unit test to specify whether we |
| 153 | // should fail early for very long strings like normal, or try to pass the |
| 154 | // long string to Uniscribe. The latter provides a way to force failure of |
| 155 | // shaping. |
| 156 | void InitWithOptionalLengthProtection(bool length_protection); |
| 157 | |
| 158 | // Tries to preload the font when the it is not accessible. |
| 159 | // This is the default implementation and it does not do anything. |
| 160 | virtual void TryToPreloadFont(HFONT font) {} |
| 161 | |
| 162 | private: |
| 163 | FRIEND_TEST(UniscribeTest, TooBig); |
| 164 | |
| 165 | // An array corresponding to each item in runs_ containing information |
| 166 | // on each of the glyphs that were generated. Like runs_, this is in |
| 167 | // reading order. However, for rtl text, the characters within each |
| 168 | // item will be reversed. |
| 169 | struct Shaping { |
| 170 | Shaping() |
| 171 | : pre_padding(0), |
| 172 | hfont_(NULL), |
| 173 | script_cache_(NULL), |
| 174 | ascent_offset_(0) { |
| 175 | abc.abcA = 0; |
| 176 | abc.abcB = 0; |
| 177 | abc.abcC = 0; |
| 178 | } |
| 179 | |
| 180 | // Returns the number of glyphs (which will be drawn to the screen) |
| 181 | // in this run. |
| 182 | int glyph_length() const { |
| 183 | return static_cast<int>(glyphs->size()); |
| 184 | } |
| 185 | |
| 186 | // Returns the number of characters (that we started with) in this run. |
| 187 | int char_length() const { |
| 188 | return static_cast<int>(logs->size()); |
| 189 | } |
| 190 | |
| 191 | // Returns the advance array that should be used when measuring glyphs. |
| 192 | // The returned pointer will indicate an array with glyph_length() elements |
| 193 | // and the advance that should be used for each one. This is either the |
| 194 | // real advance, or the justified advances if there is one, and is the |
| 195 | // array we want to use for measurement. |
| 196 | const int* effective_advances() const { |
| 197 | if (advance->empty()) |
| 198 | return 0; |
| 199 | if (justify->empty()) |
| 200 | return &advance[0]; |
| 201 | return &justify[0]; |
| 202 | } |
| 203 | |
| 204 | // This is the advance amount of space that we have added to the beginning |
| 205 | // of the run. It is like the ABC's |A| advance but one that we create and |
| 206 | // must handle internally whenever computing with pixel offsets. |
| 207 | int pre_padding; |
| 208 | |
| 209 | // Glyph indices in the font used to display this item. These indices |
| 210 | // are in screen order. |
| 211 | StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> glyphs; |
| 212 | |
| 213 | // For each input character, this tells us the first glyph index it |
| 214 | // generated. This is the only array with size of the input chars. |
| 215 | // |
| 216 | // All offsets are from the beginning of this run. Multiple characters can |
| 217 | // generate one glyph, in which case there will be adjacent duplicates in |
| 218 | // this list. One character can also generate multiple glyphs, in which |
| 219 | // case there will be skipped indices in this list. |
| 220 | StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> logs; |
| 221 | |
| 222 | // Flags and such for each glyph. |
| 223 | StackVector<SCRIPT_VISATTR, UNISCRIBE_STATE_STACK_CHARS> visattr; |
| 224 | |
| 225 | // Horizontal advances for each glyph listed above, this is basically |
| 226 | // how wide each glyph is. |
| 227 | StackVector<int, UNISCRIBE_STATE_STACK_CHARS> advance; |
| 228 | |
| 229 | // This contains glyph offsets, from the nominal position of a glyph. It |
| 230 | // is used to adjust the positions of multiple combining characters |
| 231 | // around/above/below base characters in a context-sensitive manner so |
| 232 | // that they don't bump against each other and the base character. |
| 233 | StackVector<GOFFSET, UNISCRIBE_STATE_STACK_CHARS> offsets; |
| 234 | |
| 235 | // Filled by a call to Justify, this is empty for nonjustified text. |
| 236 | // If nonempty, this contains the array of justify characters for each |
| 237 | // character as returned by ScriptJustify. |
| 238 | // |
| 239 | // This is the same as the advance array, but with extra space added for |
| 240 | // some characters. The difference between a glyph's |justify| width and |
| 241 | // it's |advance| width is the extra space added. |
| 242 | StackVector<int, UNISCRIBE_STATE_STACK_CHARS> justify; |
| 243 | |
| 244 | // Sizing information for this run. This treats the entire run as a |
| 245 | // character with a preceeding advance, width, and ending advance. |
| 246 | // The B width is the sum of the |advance| array, and the A and C widths |
| 247 | // are any extra spacing applied to each end. |
| 248 | // |
| 249 | // It is unclear from the documentation what this actually means. From |
| 250 | // experimentation, it seems that the sum of the character advances is |
| 251 | // always the sum of the ABC values, and I'm not sure what you're supposed |
| 252 | // to do with the ABC values. |
| 253 | ABC abc; |
| 254 | |
| 255 | // Pointers to windows font data used to render this run. |
| 256 | HFONT hfont_; |
| 257 | SCRIPT_CACHE* script_cache_; |
| 258 | |
| 259 | // Ascent offset between the ascent of the primary font |
| 260 | // and that of the fallback font. The offset needs to be applied, |
| 261 | // when drawing a string, to align multiple runs rendered with |
| 262 | // different fonts. |
| 263 | int ascent_offset_; |
| 264 | }; |
| 265 | |
| 266 | // Computes the runs_ array from the text run. |
| 267 | void FillRuns(); |
| 268 | |
| 269 | // Computes the shapes_ array given an runs_ array already filled in. |
| 270 | void FillShapes(); |
| 271 | |
| 272 | // Fills in the screen_order_ array (see below). |
| 273 | void FillScreenOrder(); |
| 274 | |
| 275 | // Called to update the glyph positions based on the current spacing options |
| 276 | // that are set. |
| 277 | void ApplySpacing(); |
| 278 | |
| 279 | // Normalizes all advances for spaces to the same width. This keeps windows |
| 280 | // from making spaces after Hindi characters larger, which is then |
| 281 | // inconsistent with our meaure of the width since WebKit doesn't include |
| 282 | // spaces in text-runs sent to uniscribe unless white-space:pre. |
| 283 | void AdjustSpaceAdvances(); |
| 284 | |
| 285 | // Returns the total width of a single item. |
| 286 | int AdvanceForItem(int item_index) const; |
| 287 | |
| 288 | // Shapes a run (pointed to by |input|) using |hfont| first. |
| 289 | // Tries a series of fonts specified retrieved with NextWinFontData |
| 290 | // and finally a font covering characters in |*input|. A string pointed |
| 291 | // by |input| comes from ScriptItemize and is supposed to contain |
| 292 | // characters belonging to a single script aside from characters |
| 293 | // common to all scripts (e.g. space). |
| 294 | bool Shape(const wchar_t* input, |
| 295 | int item_length, |
| 296 | int num_glyphs, |
| 297 | SCRIPT_ITEM& run, |
| 298 | Shaping& shaping); |
| 299 | |
| 300 | // Gets Windows font data for the next best font to try in the list |
| 301 | // of fonts. When there's no more font available, returns false |
| 302 | // without touching any of out params. Need to call ResetFontIndex |
| 303 | // to start scanning of the font list from the beginning. |
| 304 | virtual bool NextWinFontData(HFONT* hfont, |
| 305 | SCRIPT_CACHE** script_cache, |
| 306 | SCRIPT_FONTPROPERTIES** font_properties, |
| 307 | int* ascent) { |
| 308 | return false; |
| 309 | } |
| 310 | |
| 311 | // Resets the font index to the first in the list of fonts |
| 312 | // to try after the primaryFont turns out not to work. With font_index |
| 313 | // reset, NextWinFontData scans fallback fonts from the beginning. |
| 314 | virtual void ResetFontIndex() {} |
| 315 | |
| 316 | // The input data for this run of Uniscribe. See the constructor. |
| 317 | const wchar_t* input_; |
| 318 | const int input_length_; |
| 319 | const bool is_rtl_; |
| 320 | |
| 321 | // Windows font data for the primary font : |
| 322 | // In a sense, logfont_ and style_ are redundant because |
| 323 | // hfont_ contains all the information. However, invoking GetObject, |
| 324 | // everytime we need the height and the style, is rather expensive so |
| 325 | // that we cache them. Would it be better to add getter and (virtual) |
| 326 | // setter for the height and the style of the primary font, instead of |
| 327 | // logfont_? Then, a derived class ctor can set ascent_, height_ and style_ |
| 328 | // if they're known. Getters for them would have to 'infer' their values from |
| 329 | // hfont_ ONLY when they're not set. |
| 330 | HFONT hfont_; |
| 331 | SCRIPT_CACHE* script_cache_; |
| 332 | SCRIPT_FONTPROPERTIES* font_properties_; |
| 333 | int ascent_; |
| 334 | LOGFONT logfont_; |
| 335 | int style_; |
| 336 | |
| 337 | // Options, see the getters/setters above. |
| 338 | bool directional_override_; |
| 339 | bool inhibit_ligate_; |
| 340 | int letter_spacing_; |
| 341 | int space_width_; |
| 342 | int word_spacing_; |
| 343 | int justification_width_; |
| 344 | |
| 345 | // Uniscribe breaks the text into Runs. These are one length of text that is |
| 346 | // in one script and one direction. This array is in reading order. |
| 347 | StackVector<SCRIPT_ITEM, UNISCRIBE_STATE_STACK_RUNS> runs_; |
| 348 | |
| 349 | StackVector<Shaping, UNISCRIBE_STATE_STACK_RUNS> shapes_; |
| 350 | |
| 351 | // This is a mapping between reading order and screen order for the items. |
| 352 | // Uniscribe's items array are in reading order. For right-to-left text, |
| 353 | // or mixed (although WebKit's |TextRun| should really be only one |
| 354 | // direction), this makes it very difficult to compute character offsets |
| 355 | // and positions. This list is in screen order from left to right, and |
| 356 | // gives the index into the |runs_| and |shapes_| arrays of each |
| 357 | // subsequent item. |
| 358 | StackVector<int, UNISCRIBE_STATE_STACK_RUNS> screen_order_; |
| 359 | |
| 360 | DISALLOW_EVIL_CONSTRUCTORS(UniscribeState); |
| 361 | }; |
| 362 | |
| 363 | } // namespace gfx |
| 364 | |
| 365 | #endif // BASE_GFX_UNISCRIBE_H__ |
license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame^] | 366 | |