blob: 162e577eaa2ce5e9f2b26adab5b86b1738c2949f [file] [log] [blame]
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// A wrapper around Uniscribe that provides a reasonable API.
#ifndef BASE_GFX_UNISCRIBE_H__
#define BASE_GFX_UNISCRIBE_H__
#include <windows.h>
#include <usp10.h>
#include <wchar.h>
#include <map>
#include <vector>
#include "base/stack_container.h"
#include "testing/gtest/include/gtest/gtest_prod.h"
namespace gfx {
#define UNISCRIBE_STATE_STACK_RUNS 8
#define UNISCRIBE_STATE_STACK_CHARS 32
// This object should be safe to create & destroy frequently, as long as the
// caller preserves the script_cache when possible (this data may be slow to
// compute).
//
// This object is "kind of large" (~1K) because it reserves a lot of space for
// working with to avoid expensive heap operations. Therefore, not only should
// you not worry about creating and destroying it, you should try to not keep
// them around.
class UniscribeState {
public:
// Initializes this Uniscribe run with the text pointed to by |run| with
// |length|. The input is NOT null terminated.
//
// The is_rtl flag should be set if the input script is RTL. It is assumed
// that the caller has already divided up the input text (using ICU, for
// example) into runs of the same direction of script. This avoids
// disagreements between the caller and Uniscribe later (see FillItems).
//
// A script cache should be provided by the caller that is initialized to
// NULL. When the caller is done with the cache (it may be stored between
// runs as long as it is used consistently with the same HFONT), it should
// call ScriptFreeCache().
UniscribeState(const wchar_t* input,
int input_length,
bool is_rtl,
HFONT hfont,
SCRIPT_CACHE* script_cache,
SCRIPT_FONTPROPERTIES* font_properties);
virtual ~UniscribeState();
// Sets Uniscribe's directional override flag. False by default.
bool directional_override() const {
return directional_override_;
}
void set_directional_override(bool override) {
directional_override_ = override;
}
// Set's Uniscribe's no-ligate override flag. False by default.
bool inhibit_ligate() const {
return inhibit_ligate_;
}
void set_inhibit_ligate(bool inhibit) {
inhibit_ligate_ = inhibit;
}
// Set letter spacing. We will try to insert this much space between
// graphemes (one or more glyphs perceived as a single unit by ordinary users
// of a script). Positive values increase letter spacing, negative values
// decrease it. 0 by default.
int letter_spacing() const {
return letter_spacing_;
}
void set_letter_spacing(int letter_spacing) {
letter_spacing_ = letter_spacing;
}
// Set the width of a standard space character. We use this to normalize
// space widths. Windows will make spaces after Hindi characters larger than
// other spaces. A space_width of 0 means to use the default space width.
//
// Must be set before Init() is called.
int space_width() const {
return space_width_;
}
void set_space_width(int space_width) {
space_width_ = space_width;
}
// Set word spacing. We will try to insert this much extra space between
// each word in the input (beyond whatever whitespace character separates
// words). Positive values lead to increased letter spacing, negative values
// decrease it. 0 by default.
//
// Must be set before Init() is called.
int word_spacing() const {
return word_spacing_;
}
void set_word_spacing(int word_spacing) {
word_spacing_ = word_spacing;
}
void set_ascent(int ascent) {
ascent_ = ascent;
}
// You must call this after setting any options but before doing any
// other calls like asking for widths or drawing.
void Init() { InitWithOptionalLengthProtection(true); }
// Returns the total width in pixels of the text run.
int Width() const;
// Call to justify the text, with the amount of space that should be ADDED to
// get the desired width that the column should be justified to. Normally,
// spaces are inserted, but for Arabic there will be kashidas (extra strokes)
// inserted instead.
//
// This function MUST be called AFTER Init().
void Justify(int additional_space);
// Computes the given character offset into a pixel offset of the beginning
// of that character.
int CharacterToX(int offset) const;
// Converts the given pixel X position into a logical character offset into
// the run. For positions appearing before the first character, this will
// return -1.
int XToCharacter(int x) const;
// Draws the given characters to (x, y) in the given DC. The font will be
// handled by this function, but the font color and other attributes should
// be pre-set.
//
// The y position is the upper left corner, NOT the baseline.
void Draw(HDC dc, int x, int y, int from, int to);
// Returns the first glyph assigned to the character at the given offset.
// This function is used to retrieve glyph information when Uniscribe is
// being used to generate glyphs for non-complex, non-BMP (above U+FFFF)
// characters. These characters are not otherwise special and have no
// complex shaping rules, so we don't otherwise need Uniscribe, except
// Uniscribe is the only way to get glyphs for non-BMP characters.
//
// Returns 0 if there is no glyph for the given character.
WORD FirstGlyphForCharacter(int char_offset) const;
protected:
// Backend for init. The flag allows the unit test to specify whether we
// should fail early for very long strings like normal, or try to pass the
// long string to Uniscribe. The latter provides a way to force failure of
// shaping.
void InitWithOptionalLengthProtection(bool length_protection);
// Tries to preload the font when the it is not accessible.
// This is the default implementation and it does not do anything.
virtual void TryToPreloadFont(HFONT font) {}
private:
FRIEND_TEST(UniscribeTest, TooBig);
// An array corresponding to each item in runs_ containing information
// on each of the glyphs that were generated. Like runs_, this is in
// reading order. However, for rtl text, the characters within each
// item will be reversed.
struct Shaping {
Shaping()
: pre_padding(0),
hfont_(NULL),
script_cache_(NULL),
ascent_offset_(0) {
abc.abcA = 0;
abc.abcB = 0;
abc.abcC = 0;
}
// Returns the number of glyphs (which will be drawn to the screen)
// in this run.
int glyph_length() const {
return static_cast<int>(glyphs->size());
}
// Returns the number of characters (that we started with) in this run.
int char_length() const {
return static_cast<int>(logs->size());
}
// Returns the advance array that should be used when measuring glyphs.
// The returned pointer will indicate an array with glyph_length() elements
// and the advance that should be used for each one. This is either the
// real advance, or the justified advances if there is one, and is the
// array we want to use for measurement.
const int* effective_advances() const {
if (advance->empty())
return 0;
if (justify->empty())
return &advance[0];
return &justify[0];
}
// This is the advance amount of space that we have added to the beginning
// of the run. It is like the ABC's |A| advance but one that we create and
// must handle internally whenever computing with pixel offsets.
int pre_padding;
// Glyph indices in the font used to display this item. These indices
// are in screen order.
StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> glyphs;
// For each input character, this tells us the first glyph index it
// generated. This is the only array with size of the input chars.
//
// All offsets are from the beginning of this run. Multiple characters can
// generate one glyph, in which case there will be adjacent duplicates in
// this list. One character can also generate multiple glyphs, in which
// case there will be skipped indices in this list.
StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> logs;
// Flags and such for each glyph.
StackVector<SCRIPT_VISATTR, UNISCRIBE_STATE_STACK_CHARS> visattr;
// Horizontal advances for each glyph listed above, this is basically
// how wide each glyph is.
StackVector<int, UNISCRIBE_STATE_STACK_CHARS> advance;
// This contains glyph offsets, from the nominal position of a glyph. It
// is used to adjust the positions of multiple combining characters
// around/above/below base characters in a context-sensitive manner so
// that they don't bump against each other and the base character.
StackVector<GOFFSET, UNISCRIBE_STATE_STACK_CHARS> offsets;
// Filled by a call to Justify, this is empty for nonjustified text.
// If nonempty, this contains the array of justify characters for each
// character as returned by ScriptJustify.
//
// This is the same as the advance array, but with extra space added for
// some characters. The difference between a glyph's |justify| width and
// it's |advance| width is the extra space added.
StackVector<int, UNISCRIBE_STATE_STACK_CHARS> justify;
// Sizing information for this run. This treats the entire run as a
// character with a preceeding advance, width, and ending advance.
// The B width is the sum of the |advance| array, and the A and C widths
// are any extra spacing applied to each end.
//
// It is unclear from the documentation what this actually means. From
// experimentation, it seems that the sum of the character advances is
// always the sum of the ABC values, and I'm not sure what you're supposed
// to do with the ABC values.
ABC abc;
// Pointers to windows font data used to render this run.
HFONT hfont_;
SCRIPT_CACHE* script_cache_;
// Ascent offset between the ascent of the primary font
// and that of the fallback font. The offset needs to be applied,
// when drawing a string, to align multiple runs rendered with
// different fonts.
int ascent_offset_;
};
// Computes the runs_ array from the text run.
void FillRuns();
// Computes the shapes_ array given an runs_ array already filled in.
void FillShapes();
// Fills in the screen_order_ array (see below).
void FillScreenOrder();
// Called to update the glyph positions based on the current spacing options
// that are set.
void ApplySpacing();
// Normalizes all advances for spaces to the same width. This keeps windows
// from making spaces after Hindi characters larger, which is then
// inconsistent with our meaure of the width since WebKit doesn't include
// spaces in text-runs sent to uniscribe unless white-space:pre.
void AdjustSpaceAdvances();
// Returns the total width of a single item.
int AdvanceForItem(int item_index) const;
// Shapes a run (pointed to by |input|) using |hfont| first.
// Tries a series of fonts specified retrieved with NextWinFontData
// and finally a font covering characters in |*input|. A string pointed
// by |input| comes from ScriptItemize and is supposed to contain
// characters belonging to a single script aside from characters
// common to all scripts (e.g. space).
bool Shape(const wchar_t* input,
int item_length,
int num_glyphs,
SCRIPT_ITEM& run,
Shaping& shaping);
// Gets Windows font data for the next best font to try in the list
// of fonts. When there's no more font available, returns false
// without touching any of out params. Need to call ResetFontIndex
// to start scanning of the font list from the beginning.
virtual bool NextWinFontData(HFONT* hfont,
SCRIPT_CACHE** script_cache,
SCRIPT_FONTPROPERTIES** font_properties,
int* ascent) {
return false;
}
// Resets the font index to the first in the list of fonts
// to try after the primaryFont turns out not to work. With font_index
// reset, NextWinFontData scans fallback fonts from the beginning.
virtual void ResetFontIndex() {}
// The input data for this run of Uniscribe. See the constructor.
const wchar_t* input_;
const int input_length_;
const bool is_rtl_;
// Windows font data for the primary font :
// In a sense, logfont_ and style_ are redundant because
// hfont_ contains all the information. However, invoking GetObject,
// everytime we need the height and the style, is rather expensive so
// that we cache them. Would it be better to add getter and (virtual)
// setter for the height and the style of the primary font, instead of
// logfont_? Then, a derived class ctor can set ascent_, height_ and style_
// if they're known. Getters for them would have to 'infer' their values from
// hfont_ ONLY when they're not set.
HFONT hfont_;
SCRIPT_CACHE* script_cache_;
SCRIPT_FONTPROPERTIES* font_properties_;
int ascent_;
LOGFONT logfont_;
int style_;
// Options, see the getters/setters above.
bool directional_override_;
bool inhibit_ligate_;
int letter_spacing_;
int space_width_;
int word_spacing_;
int justification_width_;
// Uniscribe breaks the text into Runs. These are one length of text that is
// in one script and one direction. This array is in reading order.
StackVector<SCRIPT_ITEM, UNISCRIBE_STATE_STACK_RUNS> runs_;
StackVector<Shaping, UNISCRIBE_STATE_STACK_RUNS> shapes_;
// This is a mapping between reading order and screen order for the items.
// Uniscribe's items array are in reading order. For right-to-left text,
// or mixed (although WebKit's |TextRun| should really be only one
// direction), this makes it very difficult to compute character offsets
// and positions. This list is in screen order from left to right, and
// gives the index into the |runs_| and |shapes_| arrays of each
// subsequent item.
StackVector<int, UNISCRIBE_STATE_STACK_RUNS> screen_order_;
DISALLOW_EVIL_CONSTRUCTORS(UniscribeState);
};
} // namespace gfx
#endif // BASE_GFX_UNISCRIBE_H__