initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1 | // Copyright 2008, Google Inc. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Redistribution and use in source and binary forms, with or without |
| 5 | // modification, are permitted provided that the following conditions are |
| 6 | // met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright |
| 9 | // notice, this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above |
| 11 | // copyright notice, this list of conditions and the following disclaimer |
| 12 | // in the documentation and/or other materials provided with the |
| 13 | // distribution. |
| 14 | // * Neither the name of Google Inc. nor the names of its |
| 15 | // contributors may be used to endorse or promote products derived from |
| 16 | // this software without specific prior written permission. |
| 17 | // |
| 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | |
| 30 | #include "base/gfx/font_utils.h" |
| 31 | |
| 32 | #include <limits> |
| 33 | #include <map> |
| 34 | |
| 35 | #include "base/gfx/uniscribe.h" |
| 36 | #include "base/logging.h" |
| 37 | #include "base/singleton.h" |
| 38 | #include "base/string_util.h" |
| 39 | #include "unicode/locid.h" |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 40 | #include "unicode/uchar.h" |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 41 | |
| 42 | namespace gfx { |
| 43 | |
| 44 | namespace { |
| 45 | |
| 46 | // hash_map has extra cost with no sizable gain for a small number of integer |
| 47 | // key items. When the map size becomes much bigger (which will be later as |
| 48 | // more scripts are added) and this turns out to be prominent in the profile, we |
| 49 | // may consider switching to hash_map (or just an array if we support all the |
| 50 | // scripts) |
| 51 | typedef std::map<UScriptCode, const wchar_t*> ScriptToFontMap; |
| 52 | |
| 53 | struct ScriptToFontMapSingletonTraits |
| 54 | : public DefaultSingletonTraits<ScriptToFontMap> { |
| 55 | static ScriptToFontMap* New() { |
| 56 | struct FontMap { |
| 57 | UScriptCode script; |
| 58 | const wchar_t* family; |
| 59 | }; |
| 60 | |
| 61 | const static FontMap font_map[] = { |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 62 | {USCRIPT_LATIN, L"times new roman"}, |
| 63 | {USCRIPT_GREEK, L"times new roman"}, |
| 64 | {USCRIPT_CYRILLIC, L"times new roman"}, |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 65 | {USCRIPT_SIMPLIFIED_HAN, L"simsun"}, |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 66 | //{USCRIPT_TRADITIONAL_HAN, L"pmingliu"}, |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 67 | {USCRIPT_HIRAGANA, L"ms pgothic"}, |
| 68 | {USCRIPT_KATAKANA, L"ms pgothic"}, |
| 69 | {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"}, |
| 70 | {USCRIPT_HANGUL, L"gulim"}, |
| 71 | {USCRIPT_THAI, L"tahoma"}, |
| 72 | {USCRIPT_HEBREW, L"david"}, |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 73 | {USCRIPT_ARABIC, L"tahoma"}, |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 74 | {USCRIPT_DEVANAGARI, L"mangal"}, |
| 75 | {USCRIPT_BENGALI, L"vrinda"}, |
| 76 | {USCRIPT_GURMUKHI, L"raavi"}, |
| 77 | {USCRIPT_GUJARATI, L"shruti"}, |
| 78 | {USCRIPT_ORIYA, L"kalinga"}, |
| 79 | {USCRIPT_TAMIL, L"latha"}, |
| 80 | {USCRIPT_TELUGU, L"gautami"}, |
| 81 | {USCRIPT_KANNADA, L"tunga"}, |
| 82 | {USCRIPT_MALAYALAM, L"kartika"}, |
| 83 | {USCRIPT_LAO, L"dokchampa"}, |
| 84 | {USCRIPT_TIBETAN, L"microsoft himalaya"}, |
| 85 | {USCRIPT_GEORGIAN, L"sylfaen"}, |
| 86 | {USCRIPT_ARMENIAN, L"sylfaen"}, |
| 87 | {USCRIPT_ETHIOPIC, L"nyala"}, |
| 88 | {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"}, |
| 89 | {USCRIPT_CHEROKEE, L"plantagenet cherokee"}, |
| 90 | {USCRIPT_YI, L"microsoft yi balti"}, |
| 91 | {USCRIPT_SINHALA, L"iskoola pota"}, |
| 92 | {USCRIPT_SYRIAC, L"estrangelo edessa"}, |
| 93 | {USCRIPT_KHMER, L"daunpenh"}, |
| 94 | {USCRIPT_THAANA, L"mv boli"}, |
| 95 | {USCRIPT_MONGOLIAN, L"mongolian balti"}, |
| 96 | // For common, perhaps we should return a font |
| 97 | // for the current application/system locale. |
| 98 | //{USCRIPT_COMMON, L"times new roman"} |
| 99 | }; |
| 100 | |
| 101 | ScriptToFontMap* new_instance = new ScriptToFontMap; |
| 102 | // Cannot recover from OOM so that there's no need to check. |
| 103 | for (int i = 0; i < arraysize(font_map); ++i) |
| 104 | (*new_instance)[font_map[i].script] = font_map[i].family; |
| 105 | |
| 106 | // Initialize the locale-dependent mapping. |
| 107 | // Since Chrome synchronizes the ICU default locale with its UI locale, |
| 108 | // this ICU locale tells the current UI locale of Chrome. |
| 109 | Locale locale = Locale::getDefault(); |
| 110 | ScriptToFontMap::const_iterator iter; |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 111 | if (locale == Locale::getJapanese()) { |
| 112 | iter = new_instance->find(USCRIPT_HIRAGANA); |
| 113 | } else if (locale == Locale::getKorean()) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 114 | iter = new_instance->find(USCRIPT_HANGUL); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 115 | } else { |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 116 | // Use Simplified Chinese font for all other locales including |
| 117 | // Traditional Chinese because Simsun (SC font) has a wider |
| 118 | // coverage (covering both SC and TC) than PMingLiu (TC font). |
| 119 | // This also speeds up the TC version of Chrome when rendering SC pages. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 120 | iter = new_instance->find(USCRIPT_SIMPLIFIED_HAN); |
| 121 | } |
| 122 | if (iter != new_instance->end()) |
| 123 | (*new_instance)[USCRIPT_HAN] = iter->second; |
| 124 | |
| 125 | return new_instance; |
| 126 | } |
| 127 | }; |
| 128 | |
| 129 | Singleton<ScriptToFontMap, ScriptToFontMapSingletonTraits> script_font_map; |
| 130 | |
| 131 | const int kUndefinedAscent = std::numeric_limits<int>::min(); |
| 132 | |
| 133 | // Given an HFONT, return the ascent. If GetTextMetrics fails, |
| 134 | // kUndefinedAscent is returned, instead. |
| 135 | int GetAscent(HFONT hfont) { |
| 136 | HDC dc = GetDC(NULL); |
| 137 | HGDIOBJ oldFont = SelectObject(dc, hfont); |
| 138 | TEXTMETRIC tm; |
| 139 | BOOL got_metrics = GetTextMetrics(dc, &tm); |
| 140 | SelectObject(dc, oldFont); |
| 141 | ReleaseDC(NULL, dc); |
| 142 | return got_metrics ? tm.tmAscent : kUndefinedAscent; |
| 143 | } |
| 144 | |
| 145 | struct FontData { |
| 146 | FontData() : hfont(NULL), ascent(kUndefinedAscent), script_cache(NULL) {} |
| 147 | HFONT hfont; |
| 148 | int ascent; |
| 149 | mutable SCRIPT_CACHE script_cache; |
| 150 | }; |
| 151 | |
| 152 | // Again, using hash_map does not earn us much here. |
| 153 | // page_cycler_test intl2 gave us a 'better' result with map than with hash_map |
| 154 | // even though they're well-within 1-sigma of each other so that the difference |
| 155 | // is not significant. On the other hand, some pages in intl2 seem to |
| 156 | // take longer to load with map in the 1st pass. Need to experiment further. |
| 157 | typedef std::map<std::wstring, FontData*> FontDataCache; |
| 158 | struct FontDataCacheSingletonTraits |
| 159 | : public DefaultSingletonTraits<FontDataCache> { |
| 160 | static void Delete(FontDataCache* cache) { |
| 161 | FontDataCache::iterator iter = cache->begin(); |
| 162 | while (iter != cache->end()) { |
| 163 | SCRIPT_CACHE script_cache = iter->second->script_cache; |
| 164 | if (script_cache) |
| 165 | ScriptFreeCache(&script_cache); |
| 166 | delete iter->second; |
| 167 | ++iter; |
| 168 | } |
| 169 | delete cache; |
| 170 | } |
| 171 | }; |
| 172 | |
| 173 | } // namespace |
| 174 | |
| 175 | // TODO(jungshik) : this is font fallback code version 0.1 |
| 176 | // - Cover all the scripts |
| 177 | // - Get the default font for each script/generic family from the |
| 178 | // preference instead of hardcoding in the source. |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 179 | // (at least, read values from the registry for IE font settings). |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 180 | // - Support generic families (from FontDescription) |
| 181 | // - If the default font for a script is not available, |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 182 | // try some more fonts known to support it. Finally, we can |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 183 | // use EnumFontFamilies or similar APIs to come up with a list of |
| 184 | // fonts supporting the script and cache the result. |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 185 | // - Consider using UnicodeSet (or UnicodeMap) converted from |
| 186 | // GLYPHSET (BMP) or directly read from truetype cmap tables to |
| 187 | // keep track of which character is supported by which font |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 188 | // - Update script_font_cache in response to WM_FONTCHANGE |
| 189 | |
| 190 | const wchar_t* GetFontFamilyForScript(UScriptCode script, |
| 191 | GenericFamilyType generic) { |
| 192 | ScriptToFontMap::const_iterator iter = script_font_map->find(script); |
| 193 | const wchar_t* family = NULL; |
| 194 | if (iter != script_font_map->end()) { |
| 195 | family = iter->second; |
| 196 | } |
| 197 | return family; |
| 198 | } |
| 199 | |
| 200 | // TODO(jungshik) |
| 201 | // - Handle 'Inherited', 'Common' and 'Unknown' |
| 202 | // (see http://www.unicode.org/reports/tr24/#Usage_Model ) |
| 203 | // For 'Inherited' and 'Common', perhaps we need to |
| 204 | // accept another parameter indicating the previous family |
| 205 | // and just return it. |
| 206 | // - All the characters (or characters up to the point a single |
| 207 | // font can cover) need to be taken into account |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 208 | const wchar_t* GetFallbackFamily(const wchar_t *characters, |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 209 | int length, |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 210 | GenericFamilyType generic, |
| 211 | UChar32 *char_checked, |
| 212 | UScriptCode *script_checked) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 213 | DCHECK(characters && characters[0] && length > 0); |
| 214 | UScriptCode script = USCRIPT_COMMON; |
| 215 | |
| 216 | // Sometimes characters common to script (e.g. space) is at |
| 217 | // the beginning of a string so that we need to skip them |
| 218 | // to get a font required to render the string. |
| 219 | int i = 0; |
| 220 | UChar32 ucs4 = 0; |
| 221 | while (i < length && script == USCRIPT_COMMON || |
| 222 | script == USCRIPT_INVALID_CODE) { |
| 223 | U16_NEXT(characters, i, length, ucs4); |
| 224 | UErrorCode err = U_ZERO_ERROR; |
| 225 | script = uscript_getScript(ucs4, &err); |
| 226 | // silently ignore the error |
| 227 | } |
| 228 | |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 229 | // hack for full width ASCII. For the full-width ASCII, use the font |
| 230 | // for Han (which is locale-dependent). |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 231 | if (0xFF00 < ucs4 && ucs4 < 0xFF5F) |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 232 | script = USCRIPT_HAN; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 233 | |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 234 | // There are a lot of characters in USCRIPT_COMMON that can be covered |
| 235 | // by fonts for scripts closely related to them. |
| 236 | // See http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] |
| 237 | // TODO(jungshik): make this more efficient with a wider coverage |
| 238 | // (Armenian, Georgian, Devanagari, etc) |
| 239 | if (script == USCRIPT_COMMON || script == USCRIPT_INHERITED) { |
| 240 | UBlockCode block = ublock_getCode(ucs4); |
| 241 | switch (block) { |
| 242 | case UBLOCK_BASIC_LATIN: |
| 243 | script = USCRIPT_LATIN; |
| 244 | break; |
| 245 | case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: |
| 246 | script = USCRIPT_HAN; |
| 247 | break; |
| 248 | case UBLOCK_HIRAGANA: |
| 249 | case UBLOCK_KATAKANA: |
| 250 | script = USCRIPT_HIRAGANA; |
| 251 | case UBLOCK_ARABIC: |
| 252 | script = USCRIPT_ARABIC; |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | // Another lame work-around to cover non-BMP characters. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 257 | const wchar_t* family = GetFontFamilyForScript(script, generic); |
| 258 | if (!family) { |
| 259 | int plane = ucs4 >> 16; |
| 260 | switch (plane) { |
| 261 | case 1: |
| 262 | family = L"code2001"; |
| 263 | break; |
| 264 | case 2: |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 265 | family = L"simsun-extb"; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 266 | break; |
| 267 | default: |
| 268 | family = L"arial unicode ms"; |
| 269 | } |
| 270 | } |
| 271 | |
jungshik@google.com | 3ce34ed | 2008-08-08 10:36:12 +0900 | [diff] [blame^] | 272 | if (char_checked) *char_checked = ucs4; |
| 273 | if (script_checked) *script_checked = script; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 274 | return family; |
| 275 | } |
| 276 | |
| 277 | |
| 278 | |
| 279 | // Be aware that this is not thread-safe. |
| 280 | bool GetDerivedFontData(const wchar_t *family, |
| 281 | int style, |
| 282 | LOGFONT *logfont, |
| 283 | int *ascent, |
| 284 | HFONT *hfont, |
| 285 | SCRIPT_CACHE **script_cache) { |
| 286 | DCHECK(logfont && family && *family); |
| 287 | // Using |Singleton| here is not free, but the intl2 page cycler test |
| 288 | // does not show any noticeable difference with and without it. Leaking |
| 289 | // the contents of FontDataCache (especially SCRIPT_CACHE) at the end |
| 290 | // of a renderer process may not be a good idea. We may use |
| 291 | // atexit(). However, with no noticeable performance difference, |Singleton| |
| 292 | // is cleaner, I believe. |
| 293 | FontDataCache* font_data_cache = |
| 294 | Singleton<FontDataCache, FontDataCacheSingletonTraits>::get(); |
| 295 | // TODO(jungshik) : This comes up pretty high in the profile so that |
| 296 | // we need to measure whether using SHA256 (after coercing all the |
| 297 | // fields to char*) is faster than StringPrintf. |
| 298 | std::wstring font_key = StringPrintf(L"%1d:%d:%s", style, logfont->lfHeight, |
| 299 | family); |
| 300 | FontDataCache::const_iterator iter = font_data_cache->find(font_key); |
| 301 | FontData *derived; |
| 302 | if (iter == font_data_cache->end()) { |
| 303 | DCHECK(wcslen(family) < LF_FACESIZE); |
| 304 | wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family); |
| 305 | // TODO(jungshik): CreateFontIndirect always comes up with |
| 306 | // a font even if there's no font matching the name. Need to |
| 307 | // check it against what we actually want (as is done in FontCacheWin.cpp) |
| 308 | derived = new FontData; |
| 309 | derived->hfont = CreateFontIndirect(logfont); |
| 310 | // GetAscent may return kUndefinedAscent, but we still want to |
| 311 | // cache it so that we won't have to call CreateFontIndirect once |
| 312 | // more for HFONT next time. |
| 313 | derived->ascent = GetAscent(derived->hfont); |
| 314 | (*font_data_cache)[font_key] = derived; |
| 315 | } else { |
| 316 | derived = iter->second; |
| 317 | // Last time, GetAscent failed so that only HFONT was |
| 318 | // cached. Try once more assuming that TryPreloadFont |
| 319 | // was called by a caller between calls. |
| 320 | if (kUndefinedAscent == derived->ascent) |
| 321 | derived->ascent = GetAscent(derived->hfont); |
| 322 | } |
| 323 | *hfont = derived->hfont; |
| 324 | *ascent = derived->ascent; |
| 325 | *script_cache = &(derived->script_cache); |
| 326 | return *ascent != kUndefinedAscent; |
| 327 | } |
| 328 | |
| 329 | int GetStyleFromLogfont(const LOGFONT* logfont) { |
| 330 | // TODO(jungshik) : consider defining UNDEFINED or INVALID for style and |
| 331 | // returning it when logfont is NULL |
| 332 | if (!logfont) { |
| 333 | NOTREACHED(); |
| 334 | return FONT_STYLE_NORMAL; |
| 335 | } |
| 336 | return (logfont->lfItalic ? FONT_STYLE_ITALIC : FONT_STYLE_NORMAL) | |
| 337 | (logfont->lfUnderline ? FONT_STYLE_UNDERLINED : FONT_STYLE_NORMAL) | |
| 338 | (logfont->lfWeight >= 700 ? FONT_STYLE_BOLD : FONT_STYLE_NORMAL); |
| 339 | } |
| 340 | |
| 341 | } // namespace gfx |