| #!/bin/bash |
| # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| |
| # Remove display names for languages that are not listed in the accept-language |
| # list of Chromium. |
| function filter_display_language_names { |
| for lang in $(grep -v '^#' accept_lang.list) |
| do |
| # Set $OP to '|' only if $ACCEPT_LANG_PATTERN is not empty. |
| OP=${ACCEPT_LANG_PATTERN:+|} |
| ACCEPT_LANG_PATTERN="${ACCEPT_LANG_PATTERN}${OP}${lang}" |
| done |
| ACCEPT_LANG_PATTERN="(${ACCEPT_LANG_PATTERN})[^a-z]" |
| |
| echo "Filtering out display names for non-A-L languages ${langdatapath}" |
| for lang in $(grep -v '^#' chrome_ui_languages.list) |
| do |
| target=${langdatapath}/${lang}.txt |
| echo Overwriting ${target} ... |
| sed -r -i \ |
| '/^ Keys\{$/,/^ \}$/d |
| /^ Languages\{$/, /^ \}$/ { |
| /^ Languages\{$/p |
| /^ '${ACCEPT_LANG_PATTERN}'/p |
| /^ \}$/p |
| d |
| } |
| /^ Types\{$/,/^ \}$/d |
| /^ Variants\{$/,/^ \}$/d' ${target} |
| done |
| } |
| |
| |
| # Keep only the minimum locale data for non-UI languages. |
| function abridge_locale_data_for_non_ui_languages { |
| for lang in $(grep -v '^#' chrome_ui_languages.list) |
| do |
| # Set $OP to '|' only if $UI_LANGUAGES is not empty. |
| OP=${UI_LANGUAGES:+|} |
| UI_LANGUAGES="${UI_LANGUAGES}${OP}${lang}" |
| done |
| |
| EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list) |
| |
| echo Creating minimum locale data in ${localedatapath} |
| for lang in ${EXTRA_LANGUAGES} |
| do |
| target=${localedatapath}/${lang}.txt |
| [ -e ${target} ] || { echo "missing ${lang}"; continue; } |
| echo Overwriting ${target} ... |
| sed -n -r -i \ |
| '1, /^'${lang}'\{$/p |
| /^ "%%ALIAS"\{/p |
| /^ AuxExemplarCharacters\{.*\}$/p |
| /^ AuxExemplarCharacters\{$/, /^ \}$/p |
| /^ ExemplarCharacters\{.*\}$/p |
| /^ ExemplarCharacters\{$/, /^ \}$/p |
| /^ (LocaleScript|layout)\{$/, /^ \}$/p |
| /^ Version\{.*$/p |
| /^\}$/p' ${target} |
| done |
| |
| echo Creating minimum locale data in ${langdatapath} |
| for lang in ${EXTRA_LANGUAGES} |
| do |
| target=${langdatapath}/${lang}.txt |
| [ -e ${target} ] || { echo "missing ${lang}"; continue; } |
| echo Overwriting ${target} ... |
| sed -n -r -i \ |
| '1, /^'${lang}'\{$/p |
| /^ Languages\{$/, /^ \}$/ { |
| /^ Languages\{$/p |
| /^ '${lang}'\{.*\}$/p |
| /^ \}$/p |
| } |
| /^\}$/p' ${target} |
| done |
| } |
| |
| # Drop historic currencies. |
| # TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies. |
| # See also http://en.wikipedia.org/wiki/List_of_circulating_currencies |
| function filter_currency_data { |
| for currency in $(grep -v '^#' currencies_to_drop.list) |
| do |
| OP=${DROPLIST:+|} |
| DROPLIST=${DROPLIST}${OP}${currency} |
| done |
| DROPLIST="(${DROPLIST})\{" |
| |
| cd "${dataroot}/curr" |
| for i in *.txt |
| do |
| [ $i != 'supplementalData.txt' ] && \ |
| sed -r -i '/^ '$DROPLIST'/, /^ }/ d' $i |
| done |
| } |
| |
| # Remove the display names for numeric region codes other than |
| # 419 (Latin America) because we don't use them. |
| function filter_region_data { |
| cd "${dataroot}/region" |
| sed -i '/[0-35-9][0-9][0-9]{/ d' *.txt |
| } |
| |
| |
| |
| function remove_exemplar_cities { |
| cd "${dataroot}/zone" |
| for i in *.txt |
| do |
| [ $i != 'root.txt' ] && \ |
| sed -i '/^ zoneStrings/, /^ "meta:/ { |
| /^ zoneStrings/ p |
| /^ "meta:/ p |
| d |
| }' $i |
| done |
| } |
| |
| # Keep only duration and compound in units* sections. |
| function filter_locale_data { |
| for i in ${dataroot}/locales/*.txt |
| do |
| echo Overwriting $i ... |
| sed -r -i \ |
| '/^ units(|Narrow|Short)\{$/, /^ \}$/ { |
| /^ units(|Narrow|Short)\{$/ p |
| /^ (duration|compound)\{$/, /^ \}$/ p |
| /^ \}$/ p |
| d |
| }' ${i} |
| done |
| } |
| |
| # big5han and gb2312han collation do not make any sense and nobody uses them. |
| function remove_legacy_chinese_codepoint_collation { |
| echo "Removing Big5 / GB2312 collation data from Chinese locale" |
| target="${dataroot}/coll/zh.txt" |
| echo "Overwriting ${target}" |
| sed -r -i '/^ (big5|gb2312)han\{$/,/^ \}$/ d' ${target} |
| } |
| |
| dataroot="$(dirname $0)/../source/data" |
| localedatapath="${dataroot}/locales" |
| langdatapath="${dataroot}/lang" |
| |
| |
| |
| filter_display_language_names |
| abridge_locale_data_for_non_ui_languages |
| filter_currency_data |
| filter_region_data |
| remove_legacy_chinese_codepoint_collation |
| filter_locale_data |
| |
| # Chromium OS needs exemplar cities for timezones, but not Chromium. |
| # It'll save 400kB (uncompressed), but the size difference in |
| # 7z compressed installer is <= 100kB. |
| # TODO(jshin): Make separate data files for CrOS and Chromium. |
| #remove_exemplar_cities |