Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 1 | #!/usr/bin/env bash |
| 2 | |
| 3 | # Copyright 2020 The Android Open Source Project |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | |
| 17 | # Fail fast on any error. |
| 18 | set -e |
| 19 | |
| 20 | if [ -z ${ANDROID_BUILD_TOP} ]; then |
| 21 | echo \$ANDROID_BUILD_TOP must be set. |
| 22 | exit 1 |
| 23 | fi |
| 24 | |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 25 | SYSTEM_TIMEZONE_DIR=$(realpath ${ANDROID_BUILD_TOP}/system/timezone) |
Neil Fuller | 16368bb | 2021-01-27 16:27:24 +0000 | [diff] [blame] | 26 | GEOTZ_DIR=$(realpath ${ANDROID_BUILD_TOP}/packages/modules/GeoTZ) |
| 27 | DATA_PIPELINE_DIR=${GEOTZ_DIR}/data_pipeline |
| 28 | TZBB_DATA_DIR=${GEOTZ_DIR}/tzbb_data |
Neil Fuller | 05d069e | 2020-08-21 13:40:59 +0100 | [diff] [blame] | 29 | WORKING_DIR_ROOT=${DATA_PIPELINE_DIR}/working_dir |
Neil Fuller | 16368bb | 2021-01-27 16:27:24 +0000 | [diff] [blame] | 30 | OUTPUT_DATA_DIR=${GEOTZ_DIR}/output_data/odbl |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 31 | |
| 32 | # |
| 33 | # Hardcoded values that can be changed for debugging / dev to speed things up. |
| 34 | # |
| 35 | |
| 36 | # Change this to a lower level to speed up S2 sampling. |
| 37 | S2_LEVEL=12 |
| 38 | |
| 39 | # Set to 1 to skip the initial build step if no code changes are being made. |
| 40 | SKIP_BUILD=0 |
| 41 | |
| 42 | # Set to 1 to skip to a later step. See also ALLOW_WORKING_DIR_ROOT_EXISTS. |
| 43 | # Set to 1 ignore if the output dir exists. Use with SKIP_TO_STEP to resume a |
| 44 | # failed run. Most steps discover what to do by inspecting the names of files so |
| 45 | # be careful not to skip to step with no output data available. |
| 46 | SKIP_TO_STEP=0 |
| 47 | ALLOW_WORKING_DIR_ROOT_EXISTS=0 |
| 48 | |
| 49 | # Only process a subset of zone IDs from the geojson input file. |
| 50 | # Must be comma separated, no spaces. |
| 51 | # Do not use with SKIP_TO_STEP > 1 as it will have no effect. |
| 52 | # Be careful with this and ALLOW_WORKING_DIR_ROOT_EXISTS=1 as that can leave STEP1 |
| 53 | # output files from previous runs, which will then be processed by STEP2. |
| 54 | STEP1_RESTRICT_TO_ZONES= |
| 55 | |
| 56 | |
| 57 | if [ -d ${WORKING_DIR_ROOT} ]; then |
| 58 | echo Working dir ${WORKING_DIR_ROOT} exists... |
| 59 | if (( ${ALLOW_WORKING_DIR_ROOT_EXISTS} == 0 )); then |
| 60 | echo Halting... |
Neil Fuller | 05d069e | 2020-08-21 13:40:59 +0100 | [diff] [blame] | 61 | echo Remove or move ${WORKING_DIR_ROOT} and try again. |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 62 | exit 1 |
| 63 | fi |
| 64 | fi |
| 65 | |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 66 | MAX_STEP_ID=7 |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 67 | if (( ${SKIP_TO_STEP} > ${MAX_STEP_ID} )); then |
| 68 | echo Cannot skip to step ${SKIP_TO_STEP} |
| 69 | exit 1 |
| 70 | fi |
| 71 | |
| 72 | JAVA_ARGS="-J-Xmx32G" |
| 73 | |
| 74 | STEP1_TARGET=geotz_geojsontz_to_tzs2polygons |
| 75 | STEP1_CMD="${STEP1_TARGET} ${JAVA_ARGS}" |
| 76 | STEP1_THREAD_COUNT=10 |
| 77 | STEP1_WORKING_DIR=${WORKING_DIR_ROOT}/tzs2polygons |
| 78 | |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 79 | STEP2_TARGET=geotz_canonicalize_tzs2polygons |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 80 | STEP2_CMD="${STEP2_TARGET} ${JAVA_ARGS}" |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 81 | STEP2_TZIDS_FILE=${SYSTEM_TIMEZONE_DIR}/output_data/android/tzids.prototxt |
| 82 | STEP2_REPLACEMENT_THREADHOLD=2020-01-01T00:00:00.00Z |
| 83 | STEP2_WORKING_DIR=${WORKING_DIR_ROOT}/canonicalized_tzs2polygons |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 84 | |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 85 | STEP3_TARGET=geotz_tzs2polygons_to_tzs2cellunions |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 86 | STEP3_CMD="${STEP3_TARGET} ${JAVA_ARGS}" |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 87 | STEP3_THREAD_COUNT=5 |
| 88 | STEP3_WORKING_DIR=${WORKING_DIR_ROOT}/tzs2cellunions_l${S2_LEVEL} |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 89 | |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 90 | STEP4_TARGET=geotz_tzs2cellunions_to_tzs2ranges |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 91 | STEP4_CMD="${STEP4_TARGET} ${JAVA_ARGS}" |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 92 | STEP4_THREAD_COUNT=10 |
| 93 | STEP4_WORKING_DIR=${WORKING_DIR_ROOT}/tzs2ranges_l${S2_LEVEL} |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 94 | |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 95 | STEP5_TARGET=geotz_mergetzs2ranges |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 96 | STEP5_CMD="${STEP5_TARGET} ${JAVA_ARGS}" |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 97 | STEP5_THREAD_COUNT=5 |
| 98 | STEP5_WORKING_DIR=${WORKING_DIR_ROOT}/mergedtzs2ranges_l${S2_LEVEL} |
| 99 | STEP5_OUTPUT_FILE=${STEP5_WORKING_DIR}/mergedtzs2ranges${S2_LEVEL}.prototxt |
| 100 | |
| 101 | STEP6_TARGET=geotz_createtzs2fileinput |
| 102 | STEP6_CMD="${STEP6_TARGET} ${JAVA_ARGS}" |
| 103 | STEP6_OUTPUT_FILE=${WORKING_DIR_ROOT}/tzs2fileinput/tzs2fileinput${S2_LEVEL}.prototxt |
| 104 | |
| 105 | STEP7_TARGET=geotz_createtzs2file |
| 106 | STEP7_CMD="${STEP7_TARGET} ${JAVA_ARGS}" |
Neil Fuller | 05d069e | 2020-08-21 13:40:59 +0100 | [diff] [blame] | 107 | STEP7_OUTPUT_FILE=${OUTPUT_DATA_DIR}/tzs2.dat |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 108 | |
| 109 | BUILD_TARGETS=(\ |
| 110 | ${STEP1_TARGET} \ |
| 111 | ${STEP2_TARGET} \ |
| 112 | ${STEP3_TARGET} \ |
| 113 | ${STEP4_TARGET} \ |
| 114 | ${STEP5_TARGET} \ |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 115 | ${STEP6_TARGET} \ |
| 116 | ${STEP7_TARGET} \ |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 117 | ) |
| 118 | |
| 119 | echo ${0} starting at $(date --iso-8601=seconds) |
| 120 | |
| 121 | mkdir -p ${WORKING_DIR_ROOT} |
| 122 | |
| 123 | # Build all step commands |
| 124 | if (( ${SKIP_BUILD} == 1 )); then |
| 125 | echo Skipping build step... |
| 126 | else |
| 127 | BUILD_CMD="${ANDROID_BUILD_TOP}/build/soong/soong_ui.bash --make-mode -j30" |
| 128 | LOG_FILE=${WORKING_DIR_ROOT}/build.log |
| 129 | echo Building step commands. Logging to ${LOG_FILE} ... |
| 130 | { |
| 131 | ${BUILD_CMD} ${BUILD_TARGETS[@]} |
| 132 | } &> ${LOG_FILE} |
| 133 | fi |
| 134 | |
| 135 | # Step 0: Preparation, unpack the geojson file into the working dir. |
| 136 | ZIPPED_BOUNDARY_FILE=${TZBB_DATA_DIR}/timezones.geojson.zip |
| 137 | echo Starting step 0 |
| 138 | if (( ${SKIP_TO_STEP} <= 0 )); then |
Neil Fuller | a9ca97a | 2020-11-10 17:14:03 +0000 | [diff] [blame] | 139 | mkdir ${WORKING_DIR_ROOT}/dist |
| 140 | echo Unpacking ${ZIPPED_BOUNDARY_FILE} to ${WORKING_DIR_ROOT}/dist/... |
| 141 | unzip -o ${TZBB_DATA_DIR}/timezones.geojson.zip -d ${WORKING_DIR_ROOT}/dist/ |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 142 | |
| 143 | # Ensure there's a LICENSE next to the boundary file. It will be copied |
| 144 | # alongside data by subsequent steps. |
Neil Fuller | a9ca97a | 2020-11-10 17:14:03 +0000 | [diff] [blame] | 145 | echo Copying LICENSE file to ${WORKING_DIR_ROOT}/dist/ |
| 146 | cp ${TZBB_DATA_DIR}/LICENSE ${WORKING_DIR_ROOT}/dist/ |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 147 | else |
| 148 | echo Skipping... |
| 149 | fi |
| 150 | echo Completed step 0 |
| 151 | |
| 152 | UNZIPPED_BOUNDARY_FILE=${WORKING_DIR_ROOT}/dist/combined.json |
| 153 | if [ ! -f ${UNZIPPED_BOUNDARY_FILE} ]; then |
| 154 | echo "${UNZIPPED_BOUNDARY_FILE} not found" |
| 155 | exit 1 |
| 156 | fi |
| 157 | |
| 158 | # Step 1 |
| 159 | echo Starting step 1 |
| 160 | if (( ${SKIP_TO_STEP} <= 1 )); then |
| 161 | mkdir -p ${STEP1_WORKING_DIR} |
| 162 | LOG_FILE=${WORKING_DIR_ROOT}/step1.log |
| 163 | echo Logging to ${LOG_FILE} ... |
| 164 | { |
Almaz Mingaleev | 3d053b5 | 2021-02-11 14:19:19 +0000 | [diff] [blame] | 165 | ${STEP1_CMD} \ |
| 166 | --geo-json ${UNZIPPED_BOUNDARY_FILE} \ |
| 167 | --num-threads ${STEP1_THREAD_COUNT} \ |
| 168 | --output ${STEP1_WORKING_DIR} \ |
| 169 | --tz-ids "${STEP1_RESTRICT_TO_ZONES}" |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 170 | } &> ${LOG_FILE} |
| 171 | else |
| 172 | echo Skipping... |
| 173 | fi |
| 174 | echo Completed step 1 |
| 175 | |
| 176 | # Step 2 |
| 177 | echo Starting step 2 |
| 178 | if (( ${SKIP_TO_STEP} <= 2 )); then |
| 179 | mkdir -p ${STEP2_WORKING_DIR} |
| 180 | LOG_FILE=${WORKING_DIR_ROOT}/step2.log |
| 181 | echo Logging to ${LOG_FILE} ... |
| 182 | { |
Almaz Mingaleev | 3d053b5 | 2021-02-11 14:19:19 +0000 | [diff] [blame] | 183 | ${STEP2_CMD} \ |
| 184 | --input ${STEP1_WORKING_DIR} \ |
| 185 | --tz-ids ${STEP2_TZIDS_FILE} \ |
| 186 | --replacement-threshold ${STEP2_REPLACEMENT_THREADHOLD} \ |
| 187 | --output ${STEP2_WORKING_DIR} |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 188 | } &> ${LOG_FILE} |
| 189 | else |
| 190 | echo Skipping... |
| 191 | fi |
| 192 | echo Completed step 2 |
| 193 | |
| 194 | # Step 3 |
| 195 | echo Starting step 3 |
| 196 | if (( ${SKIP_TO_STEP} <= 3 )); then |
| 197 | mkdir -p ${STEP3_WORKING_DIR} |
| 198 | LOG_FILE=${WORKING_DIR_ROOT}/step3.log |
| 199 | echo Logging to ${LOG_FILE} ... |
| 200 | { |
Almaz Mingaleev | 3d053b5 | 2021-02-11 14:19:19 +0000 | [diff] [blame] | 201 | ${STEP3_CMD} \ |
| 202 | --input ${STEP2_WORKING_DIR} \ |
| 203 | --num-threads ${STEP3_THREAD_COUNT} \ |
| 204 | --output ${STEP3_WORKING_DIR} \ |
| 205 | --max-s2-level ${S2_LEVEL} |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 206 | } &> ${LOG_FILE} |
| 207 | else |
| 208 | echo Skipping... |
| 209 | fi |
| 210 | echo Completed step 3 |
| 211 | |
| 212 | # Step 4 |
| 213 | echo Starting step 4 |
| 214 | if (( ${SKIP_TO_STEP} <= 4 )); then |
| 215 | mkdir -p ${STEP4_WORKING_DIR} |
| 216 | LOG_FILE=${WORKING_DIR_ROOT}/step4.log |
| 217 | echo Logging to ${LOG_FILE} ... |
| 218 | { |
Almaz Mingaleev | 3d053b5 | 2021-02-11 14:19:19 +0000 | [diff] [blame] | 219 | ${STEP4_CMD} \ |
| 220 | --input ${STEP3_WORKING_DIR} \ |
| 221 | --num-threads ${STEP4_THREAD_COUNT} \ |
| 222 | --output ${STEP4_WORKING_DIR} \ |
| 223 | --s2-level ${S2_LEVEL} |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 224 | } &> ${LOG_FILE} |
| 225 | else |
| 226 | echo Skipping... |
| 227 | fi |
| 228 | echo Completed step 4 |
| 229 | |
| 230 | # Step 5 |
| 231 | echo Starting step 5 |
| 232 | if (( ${SKIP_TO_STEP} <= 5 )); then |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 233 | mkdir -p ${STEP5_WORKING_DIR} |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 234 | LOG_FILE=${WORKING_DIR_ROOT}/step5.log |
| 235 | echo Logging to ${LOG_FILE} ... |
| 236 | { |
Almaz Mingaleev | 3d053b5 | 2021-02-11 14:19:19 +0000 | [diff] [blame] | 237 | ${STEP5_CMD} \ |
| 238 | --input ${STEP4_WORKING_DIR} \ |
| 239 | --num-threads ${STEP5_THREAD_COUNT} \ |
| 240 | --working-dir ${STEP5_WORKING_DIR} \ |
| 241 | --output-file ${STEP5_OUTPUT_FILE} |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 242 | } &> ${LOG_FILE} |
| 243 | else |
| 244 | echo Skipping... |
| 245 | fi |
| 246 | echo Completed step 5 |
| 247 | |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 248 | # Step 6 |
| 249 | echo Starting step 6 |
| 250 | if (( ${SKIP_TO_STEP} <= 6 )); then |
| 251 | LOG_FILE=${WORKING_DIR_ROOT}/step6.log |
| 252 | echo Logging to ${LOG_FILE} ... |
| 253 | { |
Almaz Mingaleev | 3d053b5 | 2021-02-11 14:19:19 +0000 | [diff] [blame] | 254 | ${STEP6_CMD} \ |
| 255 | --input-file ${STEP5_OUTPUT_FILE} \ |
| 256 | --output-file ${STEP6_OUTPUT_FILE} |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 257 | } 2>&1 > ${LOG_FILE} |
| 258 | else |
| 259 | echo Skipping... |
| 260 | fi |
| 261 | echo Completed step 6 |
| 262 | |
| 263 | # Step 7 |
| 264 | echo Starting step 7 |
| 265 | if (( ${SKIP_TO_STEP} <= 7 )); then |
| 266 | LOG_FILE=${WORKING_DIR_ROOT}/step7.log |
| 267 | echo Logging to ${LOG_FILE} ... |
| 268 | mkdir -p $(dirname ${STEP7_OUTPUT_FILE}) |
| 269 | { |
Almaz Mingaleev | 3d053b5 | 2021-02-11 14:19:19 +0000 | [diff] [blame] | 270 | ${STEP7_CMD} \ |
| 271 | --input-file ${STEP6_OUTPUT_FILE} \ |
| 272 | --s2-level ${S2_LEVEL} \ |
| 273 | --output-file ${STEP7_OUTPUT_FILE} |
Neil Fuller | 51a3f31 | 2020-08-07 21:53:47 +0100 | [diff] [blame] | 274 | } 2>&1 > ${LOG_FILE} |
| 275 | else |
| 276 | echo Skipping... |
| 277 | fi |
| 278 | echo Completed step 7 |
| 279 | |
Neil Fuller | 05d069e | 2020-08-21 13:40:59 +0100 | [diff] [blame] | 280 | echo Output files can be found in ${OUTPUT_DATA_DIR} |
| 281 | |
Neil Fuller | dd2ad85 | 2020-07-31 15:05:59 +0100 | [diff] [blame] | 282 | echo ${0} completed at $(date --iso-8601=seconds) |