blob: 9ca18524137edba12ec5279e12e2b286f8db16e7 [file] [log] [blame]
initial.commit3f4a7322008-07-27 06:49:38 +09001// Copyright 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30#include "base/gfx/font_utils.h"
31
32#include <limits>
33#include <map>
34
35#include "base/gfx/uniscribe.h"
36#include "base/logging.h"
37#include "base/singleton.h"
38#include "base/string_util.h"
39#include "unicode/locid.h"
jungshik@google.com3ce34ed2008-08-08 10:36:12 +090040#include "unicode/uchar.h"
initial.commit3f4a7322008-07-27 06:49:38 +090041
42namespace gfx {
43
44namespace {
45
46// hash_map has extra cost with no sizable gain for a small number of integer
47// key items. When the map size becomes much bigger (which will be later as
48// more scripts are added) and this turns out to be prominent in the profile, we
49// may consider switching to hash_map (or just an array if we support all the
50// scripts)
51typedef std::map<UScriptCode, const wchar_t*> ScriptToFontMap;
52
53struct ScriptToFontMapSingletonTraits
54 : public DefaultSingletonTraits<ScriptToFontMap> {
55 static ScriptToFontMap* New() {
56 struct FontMap {
57 UScriptCode script;
58 const wchar_t* family;
59 };
60
61 const static FontMap font_map[] = {
jungshik@google.com3ce34ed2008-08-08 10:36:12 +090062 {USCRIPT_LATIN, L"times new roman"},
63 {USCRIPT_GREEK, L"times new roman"},
64 {USCRIPT_CYRILLIC, L"times new roman"},
initial.commit3f4a7322008-07-27 06:49:38 +090065 {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
jungshik@google.com3ce34ed2008-08-08 10:36:12 +090066 //{USCRIPT_TRADITIONAL_HAN, L"pmingliu"},
initial.commit3f4a7322008-07-27 06:49:38 +090067 {USCRIPT_HIRAGANA, L"ms pgothic"},
68 {USCRIPT_KATAKANA, L"ms pgothic"},
69 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
70 {USCRIPT_HANGUL, L"gulim"},
71 {USCRIPT_THAI, L"tahoma"},
72 {USCRIPT_HEBREW, L"david"},
jungshik@google.com3ce34ed2008-08-08 10:36:12 +090073 {USCRIPT_ARABIC, L"tahoma"},
initial.commit3f4a7322008-07-27 06:49:38 +090074 {USCRIPT_DEVANAGARI, L"mangal"},
75 {USCRIPT_BENGALI, L"vrinda"},
76 {USCRIPT_GURMUKHI, L"raavi"},
77 {USCRIPT_GUJARATI, L"shruti"},
78 {USCRIPT_ORIYA, L"kalinga"},
79 {USCRIPT_TAMIL, L"latha"},
80 {USCRIPT_TELUGU, L"gautami"},
81 {USCRIPT_KANNADA, L"tunga"},
82 {USCRIPT_MALAYALAM, L"kartika"},
83 {USCRIPT_LAO, L"dokchampa"},
84 {USCRIPT_TIBETAN, L"microsoft himalaya"},
85 {USCRIPT_GEORGIAN, L"sylfaen"},
86 {USCRIPT_ARMENIAN, L"sylfaen"},
87 {USCRIPT_ETHIOPIC, L"nyala"},
88 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
89 {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
90 {USCRIPT_YI, L"microsoft yi balti"},
91 {USCRIPT_SINHALA, L"iskoola pota"},
92 {USCRIPT_SYRIAC, L"estrangelo edessa"},
93 {USCRIPT_KHMER, L"daunpenh"},
94 {USCRIPT_THAANA, L"mv boli"},
95 {USCRIPT_MONGOLIAN, L"mongolian balti"},
96 // For common, perhaps we should return a font
97 // for the current application/system locale.
98 //{USCRIPT_COMMON, L"times new roman"}
99 };
100
101 ScriptToFontMap* new_instance = new ScriptToFontMap;
102 // Cannot recover from OOM so that there's no need to check.
103 for (int i = 0; i < arraysize(font_map); ++i)
104 (*new_instance)[font_map[i].script] = font_map[i].family;
105
106 // Initialize the locale-dependent mapping.
107 // Since Chrome synchronizes the ICU default locale with its UI locale,
108 // this ICU locale tells the current UI locale of Chrome.
109 Locale locale = Locale::getDefault();
110 ScriptToFontMap::const_iterator iter;
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900111 if (locale == Locale::getJapanese()) {
112 iter = new_instance->find(USCRIPT_HIRAGANA);
113 } else if (locale == Locale::getKorean()) {
initial.commit3f4a7322008-07-27 06:49:38 +0900114 iter = new_instance->find(USCRIPT_HANGUL);
initial.commit3f4a7322008-07-27 06:49:38 +0900115 } else {
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900116 // Use Simplified Chinese font for all other locales including
117 // Traditional Chinese because Simsun (SC font) has a wider
118 // coverage (covering both SC and TC) than PMingLiu (TC font).
119 // This also speeds up the TC version of Chrome when rendering SC pages.
initial.commit3f4a7322008-07-27 06:49:38 +0900120 iter = new_instance->find(USCRIPT_SIMPLIFIED_HAN);
121 }
122 if (iter != new_instance->end())
123 (*new_instance)[USCRIPT_HAN] = iter->second;
124
125 return new_instance;
126 }
127};
128
129Singleton<ScriptToFontMap, ScriptToFontMapSingletonTraits> script_font_map;
130
131const int kUndefinedAscent = std::numeric_limits<int>::min();
132
133// Given an HFONT, return the ascent. If GetTextMetrics fails,
134// kUndefinedAscent is returned, instead.
135int GetAscent(HFONT hfont) {
136 HDC dc = GetDC(NULL);
137 HGDIOBJ oldFont = SelectObject(dc, hfont);
138 TEXTMETRIC tm;
139 BOOL got_metrics = GetTextMetrics(dc, &tm);
140 SelectObject(dc, oldFont);
141 ReleaseDC(NULL, dc);
142 return got_metrics ? tm.tmAscent : kUndefinedAscent;
143}
144
145struct FontData {
146 FontData() : hfont(NULL), ascent(kUndefinedAscent), script_cache(NULL) {}
147 HFONT hfont;
148 int ascent;
149 mutable SCRIPT_CACHE script_cache;
150};
151
152// Again, using hash_map does not earn us much here.
153// page_cycler_test intl2 gave us a 'better' result with map than with hash_map
154// even though they're well-within 1-sigma of each other so that the difference
155// is not significant. On the other hand, some pages in intl2 seem to
156// take longer to load with map in the 1st pass. Need to experiment further.
157typedef std::map<std::wstring, FontData*> FontDataCache;
158struct FontDataCacheSingletonTraits
159 : public DefaultSingletonTraits<FontDataCache> {
160 static void Delete(FontDataCache* cache) {
161 FontDataCache::iterator iter = cache->begin();
162 while (iter != cache->end()) {
163 SCRIPT_CACHE script_cache = iter->second->script_cache;
164 if (script_cache)
165 ScriptFreeCache(&script_cache);
166 delete iter->second;
167 ++iter;
168 }
169 delete cache;
170 }
171};
172
173} // namespace
174
175// TODO(jungshik) : this is font fallback code version 0.1
176// - Cover all the scripts
177// - Get the default font for each script/generic family from the
178// preference instead of hardcoding in the source.
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900179// (at least, read values from the registry for IE font settings).
initial.commit3f4a7322008-07-27 06:49:38 +0900180// - Support generic families (from FontDescription)
181// - If the default font for a script is not available,
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900182// try some more fonts known to support it. Finally, we can
initial.commit3f4a7322008-07-27 06:49:38 +0900183// use EnumFontFamilies or similar APIs to come up with a list of
184// fonts supporting the script and cache the result.
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900185// - Consider using UnicodeSet (or UnicodeMap) converted from
186// GLYPHSET (BMP) or directly read from truetype cmap tables to
187// keep track of which character is supported by which font
initial.commit3f4a7322008-07-27 06:49:38 +0900188// - Update script_font_cache in response to WM_FONTCHANGE
189
190const wchar_t* GetFontFamilyForScript(UScriptCode script,
191 GenericFamilyType generic) {
192 ScriptToFontMap::const_iterator iter = script_font_map->find(script);
193 const wchar_t* family = NULL;
194 if (iter != script_font_map->end()) {
195 family = iter->second;
196 }
197 return family;
198}
199
200// TODO(jungshik)
201// - Handle 'Inherited', 'Common' and 'Unknown'
202// (see http://www.unicode.org/reports/tr24/#Usage_Model )
203// For 'Inherited' and 'Common', perhaps we need to
204// accept another parameter indicating the previous family
205// and just return it.
206// - All the characters (or characters up to the point a single
207// font can cover) need to be taken into account
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900208const wchar_t* GetFallbackFamily(const wchar_t *characters,
initial.commit3f4a7322008-07-27 06:49:38 +0900209 int length,
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900210 GenericFamilyType generic,
211 UChar32 *char_checked,
212 UScriptCode *script_checked) {
initial.commit3f4a7322008-07-27 06:49:38 +0900213 DCHECK(characters && characters[0] && length > 0);
214 UScriptCode script = USCRIPT_COMMON;
215
216 // Sometimes characters common to script (e.g. space) is at
217 // the beginning of a string so that we need to skip them
218 // to get a font required to render the string.
219 int i = 0;
220 UChar32 ucs4 = 0;
221 while (i < length && script == USCRIPT_COMMON ||
222 script == USCRIPT_INVALID_CODE) {
223 U16_NEXT(characters, i, length, ucs4);
224 UErrorCode err = U_ZERO_ERROR;
225 script = uscript_getScript(ucs4, &err);
226 // silently ignore the error
227 }
228
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900229 // hack for full width ASCII. For the full-width ASCII, use the font
230 // for Han (which is locale-dependent).
initial.commit3f4a7322008-07-27 06:49:38 +0900231 if (0xFF00 < ucs4 && ucs4 < 0xFF5F)
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900232 script = USCRIPT_HAN;
initial.commit3f4a7322008-07-27 06:49:38 +0900233
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900234 // There are a lot of characters in USCRIPT_COMMON that can be covered
235 // by fonts for scripts closely related to them.
236 // See http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
237 // TODO(jungshik): make this more efficient with a wider coverage
238 // (Armenian, Georgian, Devanagari, etc)
239 if (script == USCRIPT_COMMON || script == USCRIPT_INHERITED) {
240 UBlockCode block = ublock_getCode(ucs4);
241 switch (block) {
242 case UBLOCK_BASIC_LATIN:
243 script = USCRIPT_LATIN;
244 break;
245 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
246 script = USCRIPT_HAN;
247 break;
248 case UBLOCK_HIRAGANA:
249 case UBLOCK_KATAKANA:
250 script = USCRIPT_HIRAGANA;
251 case UBLOCK_ARABIC:
252 script = USCRIPT_ARABIC;
253 }
254 }
255
256 // Another lame work-around to cover non-BMP characters.
initial.commit3f4a7322008-07-27 06:49:38 +0900257 const wchar_t* family = GetFontFamilyForScript(script, generic);
258 if (!family) {
259 int plane = ucs4 >> 16;
260 switch (plane) {
261 case 1:
262 family = L"code2001";
263 break;
264 case 2:
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900265 family = L"simsun-extb";
initial.commit3f4a7322008-07-27 06:49:38 +0900266 break;
267 default:
268 family = L"arial unicode ms";
269 }
270 }
271
jungshik@google.com3ce34ed2008-08-08 10:36:12 +0900272 if (char_checked) *char_checked = ucs4;
273 if (script_checked) *script_checked = script;
initial.commit3f4a7322008-07-27 06:49:38 +0900274 return family;
275}
276
277
278
279// Be aware that this is not thread-safe.
280bool GetDerivedFontData(const wchar_t *family,
281 int style,
282 LOGFONT *logfont,
283 int *ascent,
284 HFONT *hfont,
285 SCRIPT_CACHE **script_cache) {
286 DCHECK(logfont && family && *family);
287 // Using |Singleton| here is not free, but the intl2 page cycler test
288 // does not show any noticeable difference with and without it. Leaking
289 // the contents of FontDataCache (especially SCRIPT_CACHE) at the end
290 // of a renderer process may not be a good idea. We may use
291 // atexit(). However, with no noticeable performance difference, |Singleton|
292 // is cleaner, I believe.
293 FontDataCache* font_data_cache =
294 Singleton<FontDataCache, FontDataCacheSingletonTraits>::get();
295 // TODO(jungshik) : This comes up pretty high in the profile so that
296 // we need to measure whether using SHA256 (after coercing all the
297 // fields to char*) is faster than StringPrintf.
298 std::wstring font_key = StringPrintf(L"%1d:%d:%s", style, logfont->lfHeight,
299 family);
300 FontDataCache::const_iterator iter = font_data_cache->find(font_key);
301 FontData *derived;
302 if (iter == font_data_cache->end()) {
303 DCHECK(wcslen(family) < LF_FACESIZE);
304 wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family);
305 // TODO(jungshik): CreateFontIndirect always comes up with
306 // a font even if there's no font matching the name. Need to
307 // check it against what we actually want (as is done in FontCacheWin.cpp)
308 derived = new FontData;
309 derived->hfont = CreateFontIndirect(logfont);
310 // GetAscent may return kUndefinedAscent, but we still want to
311 // cache it so that we won't have to call CreateFontIndirect once
312 // more for HFONT next time.
313 derived->ascent = GetAscent(derived->hfont);
314 (*font_data_cache)[font_key] = derived;
315 } else {
316 derived = iter->second;
317 // Last time, GetAscent failed so that only HFONT was
318 // cached. Try once more assuming that TryPreloadFont
319 // was called by a caller between calls.
320 if (kUndefinedAscent == derived->ascent)
321 derived->ascent = GetAscent(derived->hfont);
322 }
323 *hfont = derived->hfont;
324 *ascent = derived->ascent;
325 *script_cache = &(derived->script_cache);
326 return *ascent != kUndefinedAscent;
327}
328
329int GetStyleFromLogfont(const LOGFONT* logfont) {
330 // TODO(jungshik) : consider defining UNDEFINED or INVALID for style and
331 // returning it when logfont is NULL
332 if (!logfont) {
333 NOTREACHED();
334 return FONT_STYLE_NORMAL;
335 }
336 return (logfont->lfItalic ? FONT_STYLE_ITALIC : FONT_STYLE_NORMAL) |
337 (logfont->lfUnderline ? FONT_STYLE_UNDERLINED : FONT_STYLE_NORMAL) |
338 (logfont->lfWeight >= 700 ? FONT_STYLE_BOLD : FONT_STYLE_NORMAL);
339}
340
341} // namespace gfx