blob: 162e577eaa2ce5e9f2b26adab5b86b1738c2949f [file] [log] [blame]
license.botf003cfe2008-08-24 09:55:55 +09001// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit3f4a7322008-07-27 06:49:38 +09004//
5// A wrapper around Uniscribe that provides a reasonable API.
6
7#ifndef BASE_GFX_UNISCRIBE_H__
8#define BASE_GFX_UNISCRIBE_H__
9
10#include <windows.h>
11#include <usp10.h>
12#include <wchar.h>
13#include <map>
14#include <vector>
15
16#include "base/stack_container.h"
17#include "testing/gtest/include/gtest/gtest_prod.h"
18
19namespace gfx {
20
21#define UNISCRIBE_STATE_STACK_RUNS 8
22#define UNISCRIBE_STATE_STACK_CHARS 32
23
24// This object should be safe to create & destroy frequently, as long as the
25// caller preserves the script_cache when possible (this data may be slow to
26// compute).
27//
28// This object is "kind of large" (~1K) because it reserves a lot of space for
29// working with to avoid expensive heap operations. Therefore, not only should
30// you not worry about creating and destroying it, you should try to not keep
31// them around.
32class UniscribeState {
33 public:
34 // Initializes this Uniscribe run with the text pointed to by |run| with
35 // |length|. The input is NOT null terminated.
36 //
37 // The is_rtl flag should be set if the input script is RTL. It is assumed
38 // that the caller has already divided up the input text (using ICU, for
39 // example) into runs of the same direction of script. This avoids
40 // disagreements between the caller and Uniscribe later (see FillItems).
41 //
42 // A script cache should be provided by the caller that is initialized to
43 // NULL. When the caller is done with the cache (it may be stored between
44 // runs as long as it is used consistently with the same HFONT), it should
45 // call ScriptFreeCache().
46 UniscribeState(const wchar_t* input,
47 int input_length,
48 bool is_rtl,
49 HFONT hfont,
50 SCRIPT_CACHE* script_cache,
51 SCRIPT_FONTPROPERTIES* font_properties);
52
53 virtual ~UniscribeState();
54
55 // Sets Uniscribe's directional override flag. False by default.
56 bool directional_override() const {
57 return directional_override_;
58 }
59 void set_directional_override(bool override) {
60 directional_override_ = override;
61 }
62
63 // Set's Uniscribe's no-ligate override flag. False by default.
64 bool inhibit_ligate() const {
65 return inhibit_ligate_;
66 }
67 void set_inhibit_ligate(bool inhibit) {
68 inhibit_ligate_ = inhibit;
69 }
70
71 // Set letter spacing. We will try to insert this much space between
72 // graphemes (one or more glyphs perceived as a single unit by ordinary users
73 // of a script). Positive values increase letter spacing, negative values
74 // decrease it. 0 by default.
75 int letter_spacing() const {
76 return letter_spacing_;
77 }
78 void set_letter_spacing(int letter_spacing) {
79 letter_spacing_ = letter_spacing;
80 }
81
82 // Set the width of a standard space character. We use this to normalize
83 // space widths. Windows will make spaces after Hindi characters larger than
84 // other spaces. A space_width of 0 means to use the default space width.
85 //
86 // Must be set before Init() is called.
87 int space_width() const {
88 return space_width_;
89 }
90 void set_space_width(int space_width) {
91 space_width_ = space_width;
92 }
93
94 // Set word spacing. We will try to insert this much extra space between
95 // each word in the input (beyond whatever whitespace character separates
96 // words). Positive values lead to increased letter spacing, negative values
97 // decrease it. 0 by default.
98 //
99 // Must be set before Init() is called.
100 int word_spacing() const {
101 return word_spacing_;
102 }
103 void set_word_spacing(int word_spacing) {
104 word_spacing_ = word_spacing;
105 }
106 void set_ascent(int ascent) {
107 ascent_ = ascent;
108 }
109
110 // You must call this after setting any options but before doing any
111 // other calls like asking for widths or drawing.
112 void Init() { InitWithOptionalLengthProtection(true); }
113
114 // Returns the total width in pixels of the text run.
115 int Width() const;
116
117 // Call to justify the text, with the amount of space that should be ADDED to
118 // get the desired width that the column should be justified to. Normally,
119 // spaces are inserted, but for Arabic there will be kashidas (extra strokes)
120 // inserted instead.
121 //
122 // This function MUST be called AFTER Init().
123 void Justify(int additional_space);
124
125 // Computes the given character offset into a pixel offset of the beginning
126 // of that character.
127 int CharacterToX(int offset) const;
128
129 // Converts the given pixel X position into a logical character offset into
130 // the run. For positions appearing before the first character, this will
131 // return -1.
132 int XToCharacter(int x) const;
133
134 // Draws the given characters to (x, y) in the given DC. The font will be
135 // handled by this function, but the font color and other attributes should
136 // be pre-set.
137 //
138 // The y position is the upper left corner, NOT the baseline.
139 void Draw(HDC dc, int x, int y, int from, int to);
140
141 // Returns the first glyph assigned to the character at the given offset.
142 // This function is used to retrieve glyph information when Uniscribe is
143 // being used to generate glyphs for non-complex, non-BMP (above U+FFFF)
144 // characters. These characters are not otherwise special and have no
145 // complex shaping rules, so we don't otherwise need Uniscribe, except
146 // Uniscribe is the only way to get glyphs for non-BMP characters.
147 //
148 // Returns 0 if there is no glyph for the given character.
149 WORD FirstGlyphForCharacter(int char_offset) const;
150
151 protected:
152 // Backend for init. The flag allows the unit test to specify whether we
153 // should fail early for very long strings like normal, or try to pass the
154 // long string to Uniscribe. The latter provides a way to force failure of
155 // shaping.
156 void InitWithOptionalLengthProtection(bool length_protection);
157
158 // Tries to preload the font when the it is not accessible.
159 // This is the default implementation and it does not do anything.
160 virtual void TryToPreloadFont(HFONT font) {}
161
162 private:
163 FRIEND_TEST(UniscribeTest, TooBig);
164
165 // An array corresponding to each item in runs_ containing information
166 // on each of the glyphs that were generated. Like runs_, this is in
167 // reading order. However, for rtl text, the characters within each
168 // item will be reversed.
169 struct Shaping {
170 Shaping()
171 : pre_padding(0),
172 hfont_(NULL),
173 script_cache_(NULL),
174 ascent_offset_(0) {
175 abc.abcA = 0;
176 abc.abcB = 0;
177 abc.abcC = 0;
178 }
179
180 // Returns the number of glyphs (which will be drawn to the screen)
181 // in this run.
182 int glyph_length() const {
183 return static_cast<int>(glyphs->size());
184 }
185
186 // Returns the number of characters (that we started with) in this run.
187 int char_length() const {
188 return static_cast<int>(logs->size());
189 }
190
191 // Returns the advance array that should be used when measuring glyphs.
192 // The returned pointer will indicate an array with glyph_length() elements
193 // and the advance that should be used for each one. This is either the
194 // real advance, or the justified advances if there is one, and is the
195 // array we want to use for measurement.
196 const int* effective_advances() const {
197 if (advance->empty())
198 return 0;
199 if (justify->empty())
200 return &advance[0];
201 return &justify[0];
202 }
203
204 // This is the advance amount of space that we have added to the beginning
205 // of the run. It is like the ABC's |A| advance but one that we create and
206 // must handle internally whenever computing with pixel offsets.
207 int pre_padding;
208
209 // Glyph indices in the font used to display this item. These indices
210 // are in screen order.
211 StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> glyphs;
212
213 // For each input character, this tells us the first glyph index it
214 // generated. This is the only array with size of the input chars.
215 //
216 // All offsets are from the beginning of this run. Multiple characters can
217 // generate one glyph, in which case there will be adjacent duplicates in
218 // this list. One character can also generate multiple glyphs, in which
219 // case there will be skipped indices in this list.
220 StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> logs;
221
222 // Flags and such for each glyph.
223 StackVector<SCRIPT_VISATTR, UNISCRIBE_STATE_STACK_CHARS> visattr;
224
225 // Horizontal advances for each glyph listed above, this is basically
226 // how wide each glyph is.
227 StackVector<int, UNISCRIBE_STATE_STACK_CHARS> advance;
228
229 // This contains glyph offsets, from the nominal position of a glyph. It
230 // is used to adjust the positions of multiple combining characters
231 // around/above/below base characters in a context-sensitive manner so
232 // that they don't bump against each other and the base character.
233 StackVector<GOFFSET, UNISCRIBE_STATE_STACK_CHARS> offsets;
234
235 // Filled by a call to Justify, this is empty for nonjustified text.
236 // If nonempty, this contains the array of justify characters for each
237 // character as returned by ScriptJustify.
238 //
239 // This is the same as the advance array, but with extra space added for
240 // some characters. The difference between a glyph's |justify| width and
241 // it's |advance| width is the extra space added.
242 StackVector<int, UNISCRIBE_STATE_STACK_CHARS> justify;
243
244 // Sizing information for this run. This treats the entire run as a
245 // character with a preceeding advance, width, and ending advance.
246 // The B width is the sum of the |advance| array, and the A and C widths
247 // are any extra spacing applied to each end.
248 //
249 // It is unclear from the documentation what this actually means. From
250 // experimentation, it seems that the sum of the character advances is
251 // always the sum of the ABC values, and I'm not sure what you're supposed
252 // to do with the ABC values.
253 ABC abc;
254
255 // Pointers to windows font data used to render this run.
256 HFONT hfont_;
257 SCRIPT_CACHE* script_cache_;
258
259 // Ascent offset between the ascent of the primary font
260 // and that of the fallback font. The offset needs to be applied,
261 // when drawing a string, to align multiple runs rendered with
262 // different fonts.
263 int ascent_offset_;
264 };
265
266 // Computes the runs_ array from the text run.
267 void FillRuns();
268
269 // Computes the shapes_ array given an runs_ array already filled in.
270 void FillShapes();
271
272 // Fills in the screen_order_ array (see below).
273 void FillScreenOrder();
274
275 // Called to update the glyph positions based on the current spacing options
276 // that are set.
277 void ApplySpacing();
278
279 // Normalizes all advances for spaces to the same width. This keeps windows
280 // from making spaces after Hindi characters larger, which is then
281 // inconsistent with our meaure of the width since WebKit doesn't include
282 // spaces in text-runs sent to uniscribe unless white-space:pre.
283 void AdjustSpaceAdvances();
284
285 // Returns the total width of a single item.
286 int AdvanceForItem(int item_index) const;
287
288 // Shapes a run (pointed to by |input|) using |hfont| first.
289 // Tries a series of fonts specified retrieved with NextWinFontData
290 // and finally a font covering characters in |*input|. A string pointed
291 // by |input| comes from ScriptItemize and is supposed to contain
292 // characters belonging to a single script aside from characters
293 // common to all scripts (e.g. space).
294 bool Shape(const wchar_t* input,
295 int item_length,
296 int num_glyphs,
297 SCRIPT_ITEM& run,
298 Shaping& shaping);
299
300 // Gets Windows font data for the next best font to try in the list
301 // of fonts. When there's no more font available, returns false
302 // without touching any of out params. Need to call ResetFontIndex
303 // to start scanning of the font list from the beginning.
304 virtual bool NextWinFontData(HFONT* hfont,
305 SCRIPT_CACHE** script_cache,
306 SCRIPT_FONTPROPERTIES** font_properties,
307 int* ascent) {
308 return false;
309 }
310
311 // Resets the font index to the first in the list of fonts
312 // to try after the primaryFont turns out not to work. With font_index
313 // reset, NextWinFontData scans fallback fonts from the beginning.
314 virtual void ResetFontIndex() {}
315
316 // The input data for this run of Uniscribe. See the constructor.
317 const wchar_t* input_;
318 const int input_length_;
319 const bool is_rtl_;
320
321 // Windows font data for the primary font :
322 // In a sense, logfont_ and style_ are redundant because
323 // hfont_ contains all the information. However, invoking GetObject,
324 // everytime we need the height and the style, is rather expensive so
325 // that we cache them. Would it be better to add getter and (virtual)
326 // setter for the height and the style of the primary font, instead of
327 // logfont_? Then, a derived class ctor can set ascent_, height_ and style_
328 // if they're known. Getters for them would have to 'infer' their values from
329 // hfont_ ONLY when they're not set.
330 HFONT hfont_;
331 SCRIPT_CACHE* script_cache_;
332 SCRIPT_FONTPROPERTIES* font_properties_;
333 int ascent_;
334 LOGFONT logfont_;
335 int style_;
336
337 // Options, see the getters/setters above.
338 bool directional_override_;
339 bool inhibit_ligate_;
340 int letter_spacing_;
341 int space_width_;
342 int word_spacing_;
343 int justification_width_;
344
345 // Uniscribe breaks the text into Runs. These are one length of text that is
346 // in one script and one direction. This array is in reading order.
347 StackVector<SCRIPT_ITEM, UNISCRIBE_STATE_STACK_RUNS> runs_;
348
349 StackVector<Shaping, UNISCRIBE_STATE_STACK_RUNS> shapes_;
350
351 // This is a mapping between reading order and screen order for the items.
352 // Uniscribe's items array are in reading order. For right-to-left text,
353 // or mixed (although WebKit's |TextRun| should really be only one
354 // direction), this makes it very difficult to compute character offsets
355 // and positions. This list is in screen order from left to right, and
356 // gives the index into the |runs_| and |shapes_| arrays of each
357 // subsequent item.
358 StackVector<int, UNISCRIBE_STATE_STACK_RUNS> screen_order_;
359
360 DISALLOW_EVIL_CONSTRUCTORS(UniscribeState);
361};
362
363} // namespace gfx
364
365#endif // BASE_GFX_UNISCRIBE_H__
license.botf003cfe2008-08-24 09:55:55 +0900366