blob: 4bf72d1530acf19f9300da5ea7aca732c4d53522 [file] [log] [blame]
license.botf003cfe2008-08-24 09:55:55 +09001// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit3f4a7322008-07-27 06:49:38 +09004
5#include "base/gfx/font_utils.h"
6
7#include <limits>
8#include <map>
9
10#include "base/gfx/uniscribe.h"
11#include "base/logging.h"
12#include "base/singleton.h"
13#include "base/string_util.h"
14#include "unicode/locid.h"
jungshik@google.com3ce34ed2008-08-08 10:36:12 +090015#include "unicode/uchar.h"
initial.commit3f4a7322008-07-27 06:49:38 +090016
17namespace gfx {
18
19namespace {
20
21// hash_map has extra cost with no sizable gain for a small number of integer
22// key items. When the map size becomes much bigger (which will be later as
23// more scripts are added) and this turns out to be prominent in the profile, we
24// may consider switching to hash_map (or just an array if we support all the
25// scripts)
26typedef std::map<UScriptCode, const wchar_t*> ScriptToFontMap;
27
28struct ScriptToFontMapSingletonTraits
29 : public DefaultSingletonTraits<ScriptToFontMap> {
30 static ScriptToFontMap* New() {
31 struct FontMap {
32 UScriptCode script;
33 const wchar_t* family;
34 };
35
36 const static FontMap font_map[] = {
jungshik@google.com3ce34ed2008-08-08 10:36:12 +090037 {USCRIPT_LATIN, L"times new roman"},
38 {USCRIPT_GREEK, L"times new roman"},
39 {USCRIPT_CYRILLIC, L"times new roman"},
initial.commit3f4a7322008-07-27 06:49:38 +090040 {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
jungshik@google.com3ce34ed2008-08-08 10:36:12 +090041 //{USCRIPT_TRADITIONAL_HAN, L"pmingliu"},
initial.commit3f4a7322008-07-27 06:49:38 +090042 {USCRIPT_HIRAGANA, L"ms pgothic"},
43 {USCRIPT_KATAKANA, L"ms pgothic"},
44 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
45 {USCRIPT_HANGUL, L"gulim"},
46 {USCRIPT_THAI, L"tahoma"},
47 {USCRIPT_HEBREW, L"david"},
jungshik@google.com3ce34ed2008-08-08 10:36:12 +090048 {USCRIPT_ARABIC, L"tahoma"},
initial.commit3f4a7322008-07-27 06:49:38 +090049 {USCRIPT_DEVANAGARI, L"mangal"},
50 {USCRIPT_BENGALI, L"vrinda"},
51 {USCRIPT_GURMUKHI, L"raavi"},
52 {USCRIPT_GUJARATI, L"shruti"},
53 {USCRIPT_ORIYA, L"kalinga"},
54 {USCRIPT_TAMIL, L"latha"},
55 {USCRIPT_TELUGU, L"gautami"},
56 {USCRIPT_KANNADA, L"tunga"},
57 {USCRIPT_MALAYALAM, L"kartika"},
58 {USCRIPT_LAO, L"dokchampa"},
59 {USCRIPT_TIBETAN, L"microsoft himalaya"},
60 {USCRIPT_GEORGIAN, L"sylfaen"},
61 {USCRIPT_ARMENIAN, L"sylfaen"},
62 {USCRIPT_ETHIOPIC, L"nyala"},
63 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
64 {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
65 {USCRIPT_YI, L"microsoft yi balti"},
66 {USCRIPT_SINHALA, L"iskoola pota"},
67 {USCRIPT_SYRIAC, L"estrangelo edessa"},
68 {USCRIPT_KHMER, L"daunpenh"},
69 {USCRIPT_THAANA, L"mv boli"},
70 {USCRIPT_MONGOLIAN, L"mongolian balti"},
71 // For common, perhaps we should return a font
72 // for the current application/system locale.
73 //{USCRIPT_COMMON, L"times new roman"}
74 };
75
76 ScriptToFontMap* new_instance = new ScriptToFontMap;
77 // Cannot recover from OOM so that there's no need to check.
78 for (int i = 0; i < arraysize(font_map); ++i)
79 (*new_instance)[font_map[i].script] = font_map[i].family;
80
81 // Initialize the locale-dependent mapping.
82 // Since Chrome synchronizes the ICU default locale with its UI locale,
83 // this ICU locale tells the current UI locale of Chrome.
84 Locale locale = Locale::getDefault();
85 ScriptToFontMap::const_iterator iter;
jungshik@google.com3ce34ed2008-08-08 10:36:12 +090086 if (locale == Locale::getJapanese()) {
87 iter = new_instance->find(USCRIPT_HIRAGANA);
88 } else if (locale == Locale::getKorean()) {
initial.commit3f4a7322008-07-27 06:49:38 +090089 iter = new_instance->find(USCRIPT_HANGUL);
initial.commit3f4a7322008-07-27 06:49:38 +090090 } else {
jungshik@google.com3ce34ed2008-08-08 10:36:12 +090091 // Use Simplified Chinese font for all other locales including
92 // Traditional Chinese because Simsun (SC font) has a wider
93 // coverage (covering both SC and TC) than PMingLiu (TC font).
94 // This also speeds up the TC version of Chrome when rendering SC pages.
initial.commit3f4a7322008-07-27 06:49:38 +090095 iter = new_instance->find(USCRIPT_SIMPLIFIED_HAN);
96 }
97 if (iter != new_instance->end())
98 (*new_instance)[USCRIPT_HAN] = iter->second;
99
100 return new_instance;
101 }
102};
103
104Singleton<ScriptToFontMap, ScriptToFontMapSingletonTraits> script_font_map;
105
106const int kUndefinedAscent = std::numeric_limits<int>::min();
107
108// Given an HFONT, return the ascent. If GetTextMetrics fails,
109// kUndefinedAscent is returned, instead.
110int GetAscent(HFONT hfont) {
111 HDC dc = GetDC(NULL);
112 HGDIOBJ oldFont = SelectObject(dc, hfont);
113 TEXTMETRIC tm;
114 BOOL got_metrics = GetTextMetrics(dc, &tm);
115 SelectObject(dc, oldFont);
116 ReleaseDC(NULL, dc);
117 return got_metrics ? tm.tmAscent : kUndefinedAscent;
118}
119
120struct FontData {
121 FontData() : hfont(NULL), ascent(kUndefinedAscent), script_cache(NULL) {}
122 HFONT hfont;
123 int ascent;
124 mutable SCRIPT_CACHE script_cache;
125};
126
127// Again, using hash_map does not earn us much here.
128// page_cycler_test intl2 gave us a 'better' result with map than with hash_map
129// even though they're well-within 1-sigma of each other so that the difference
130// is not significant. On the other hand, some pages in intl2 seem to
131// take longer to load with map in the 1st pass. Need to experiment further.
132typedef std::map<std::wstring, FontData*> FontDataCache;
133struct FontDataCacheSingletonTraits
134 : public DefaultSingletonTraits<FontDataCache> {
135 static void Delete(FontDataCache* cache) {
136 FontDataCache::iterator iter = cache->begin();
137 while (iter != cache->end()) {
138 SCRIPT_CACHE script_cache = iter->second->script_cache;
139 if (script_cache)
140 ScriptFreeCache(&script_cache);
141 delete iter->second;
142 ++iter;
143 }
144 delete cache;
145 }
146};
147
148} // namespace
149
150// TODO(jungshik) : this is font fallback code version 0.1
151// - Cover all the scripts
152// - Get the default font for each script/generic family from the
153// preference instead of hardcoding in the source.
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900154// (at least, read values from the registry for IE font settings).
initial.commit3f4a7322008-07-27 06:49:38 +0900155// - Support generic families (from FontDescription)
156// - If the default font for a script is not available,
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900157// try some more fonts known to support it. Finally, we can
initial.commit3f4a7322008-07-27 06:49:38 +0900158// use EnumFontFamilies or similar APIs to come up with a list of
159// fonts supporting the script and cache the result.
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900160// - Consider using UnicodeSet (or UnicodeMap) converted from
161// GLYPHSET (BMP) or directly read from truetype cmap tables to
162// keep track of which character is supported by which font
initial.commit3f4a7322008-07-27 06:49:38 +0900163// - Update script_font_cache in response to WM_FONTCHANGE
164
165const wchar_t* GetFontFamilyForScript(UScriptCode script,
166 GenericFamilyType generic) {
167 ScriptToFontMap::const_iterator iter = script_font_map->find(script);
168 const wchar_t* family = NULL;
169 if (iter != script_font_map->end()) {
170 family = iter->second;
171 }
172 return family;
173}
174
175// TODO(jungshik)
176// - Handle 'Inherited', 'Common' and 'Unknown'
177// (see http://www.unicode.org/reports/tr24/#Usage_Model )
178// For 'Inherited' and 'Common', perhaps we need to
179// accept another parameter indicating the previous family
180// and just return it.
181// - All the characters (or characters up to the point a single
182// font can cover) need to be taken into account
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900183const wchar_t* GetFallbackFamily(const wchar_t *characters,
initial.commit3f4a7322008-07-27 06:49:38 +0900184 int length,
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900185 GenericFamilyType generic,
186 UChar32 *char_checked,
187 UScriptCode *script_checked) {
initial.commit3f4a7322008-07-27 06:49:38 +0900188 DCHECK(characters && characters[0] && length > 0);
189 UScriptCode script = USCRIPT_COMMON;
190
191 // Sometimes characters common to script (e.g. space) is at
192 // the beginning of a string so that we need to skip them
193 // to get a font required to render the string.
194 int i = 0;
195 UChar32 ucs4 = 0;
196 while (i < length && script == USCRIPT_COMMON ||
197 script == USCRIPT_INVALID_CODE) {
198 U16_NEXT(characters, i, length, ucs4);
199 UErrorCode err = U_ZERO_ERROR;
200 script = uscript_getScript(ucs4, &err);
201 // silently ignore the error
202 }
203
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900204 // hack for full width ASCII. For the full-width ASCII, use the font
205 // for Han (which is locale-dependent).
initial.commit3f4a7322008-07-27 06:49:38 +0900206 if (0xFF00 < ucs4 && ucs4 < 0xFF5F)
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900207 script = USCRIPT_HAN;
initial.commit3f4a7322008-07-27 06:49:38 +0900208
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900209 // There are a lot of characters in USCRIPT_COMMON that can be covered
210 // by fonts for scripts closely related to them.
211 // See http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
212 // TODO(jungshik): make this more efficient with a wider coverage
213 // (Armenian, Georgian, Devanagari, etc)
214 if (script == USCRIPT_COMMON || script == USCRIPT_INHERITED) {
215 UBlockCode block = ublock_getCode(ucs4);
216 switch (block) {
217 case UBLOCK_BASIC_LATIN:
218 script = USCRIPT_LATIN;
219 break;
220 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
221 script = USCRIPT_HAN;
222 break;
223 case UBLOCK_HIRAGANA:
224 case UBLOCK_KATAKANA:
225 script = USCRIPT_HIRAGANA;
226 case UBLOCK_ARABIC:
227 script = USCRIPT_ARABIC;
228 }
229 }
230
231 // Another lame work-around to cover non-BMP characters.
initial.commit3f4a7322008-07-27 06:49:38 +0900232 const wchar_t* family = GetFontFamilyForScript(script, generic);
233 if (!family) {
234 int plane = ucs4 >> 16;
235 switch (plane) {
236 case 1:
237 family = L"code2001";
238 break;
239 case 2:
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900240 family = L"simsun-extb";
initial.commit3f4a7322008-07-27 06:49:38 +0900241 break;
242 default:
243 family = L"arial unicode ms";
244 }
245 }
246
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900247 if (char_checked) *char_checked = ucs4;
248 if (script_checked) *script_checked = script;
initial.commit3f4a7322008-07-27 06:49:38 +0900249 return family;
250}
251
252
253
254// Be aware that this is not thread-safe.
255bool GetDerivedFontData(const wchar_t *family,
256 int style,
257 LOGFONT *logfont,
258 int *ascent,
259 HFONT *hfont,
260 SCRIPT_CACHE **script_cache) {
261 DCHECK(logfont && family && *family);
262 // Using |Singleton| here is not free, but the intl2 page cycler test
263 // does not show any noticeable difference with and without it. Leaking
264 // the contents of FontDataCache (especially SCRIPT_CACHE) at the end
265 // of a renderer process may not be a good idea. We may use
266 // atexit(). However, with no noticeable performance difference, |Singleton|
267 // is cleaner, I believe.
268 FontDataCache* font_data_cache =
269 Singleton<FontDataCache, FontDataCacheSingletonTraits>::get();
270 // TODO(jungshik) : This comes up pretty high in the profile so that
271 // we need to measure whether using SHA256 (after coercing all the
272 // fields to char*) is faster than StringPrintf.
mmentovai@google.comaeff9442008-08-14 09:41:45 +0900273 std::wstring font_key = StringPrintf(L"%1d:%d:%ls", style, logfont->lfHeight,
initial.commit3f4a7322008-07-27 06:49:38 +0900274 family);
275 FontDataCache::const_iterator iter = font_data_cache->find(font_key);
276 FontData *derived;
277 if (iter == font_data_cache->end()) {
278 DCHECK(wcslen(family) < LF_FACESIZE);
279 wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family);
280 // TODO(jungshik): CreateFontIndirect always comes up with
281 // a font even if there's no font matching the name. Need to
282 // check it against what we actually want (as is done in FontCacheWin.cpp)
283 derived = new FontData;
284 derived->hfont = CreateFontIndirect(logfont);
285 // GetAscent may return kUndefinedAscent, but we still want to
286 // cache it so that we won't have to call CreateFontIndirect once
287 // more for HFONT next time.
288 derived->ascent = GetAscent(derived->hfont);
289 (*font_data_cache)[font_key] = derived;
290 } else {
291 derived = iter->second;
292 // Last time, GetAscent failed so that only HFONT was
293 // cached. Try once more assuming that TryPreloadFont
294 // was called by a caller between calls.
295 if (kUndefinedAscent == derived->ascent)
296 derived->ascent = GetAscent(derived->hfont);
297 }
298 *hfont = derived->hfont;
299 *ascent = derived->ascent;
300 *script_cache = &(derived->script_cache);
301 return *ascent != kUndefinedAscent;
302}
303
304int GetStyleFromLogfont(const LOGFONT* logfont) {
305 // TODO(jungshik) : consider defining UNDEFINED or INVALID for style and
306 // returning it when logfont is NULL
307 if (!logfont) {
308 NOTREACHED();
309 return FONT_STYLE_NORMAL;
310 }
311 return (logfont->lfItalic ? FONT_STYLE_ITALIC : FONT_STYLE_NORMAL) |
312 (logfont->lfUnderline ? FONT_STYLE_UNDERLINED : FONT_STYLE_NORMAL) |
313 (logfont->lfWeight >= 700 ? FONT_STYLE_BOLD : FONT_STYLE_NORMAL);
314}
315
316} // namespace gfx
license.botf003cfe2008-08-24 09:55:55 +0900317