blob: 76d63aae26a305385d681149359a580401054f48 [file] [log] [blame]
Tobias Thierer8d5f1162017-03-21 16:16:57 +11001#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Helps compare openjdk_java_files contents against upstream file contents.
18
19Outputs a tab-separated table comparing each openjdk_java_files entry
20against OpenJDK upstreams. This can help verify updates to later upstreams
21or focus attention towards files that may have been missed in a previous
22update (http://b/36461944) or are otherwise surprising (http://b/36429512).
23
24 - Identifies each file as identical to, different from or missing from
25 each upstream; diffs are not produced.
26 - Optionally, copies all openjdk_java_files from the default upstream
27 (eg. OpenJDK8u121-b13) to a new directory, for easy directory comparison
28 using e.g. kdiff3, which allows inspecting detailed diffs.
29 - The ANDROID_BUILD_TOP environment variable must be set to point to the
30 AOSP root directory (parent of libcore).
31 - Run with -h command line argument to get usage instructions.
32
33To check out upstreams OpenJDK 7u40, 8u60 and 8u121-b13, run:
34
35mkdir openjdk
36cd openjdk
37hg clone http://hg.openjdk.java.net/jdk7u/jdk7u40/ 7u40
38(cd !$ ; sh get_source.sh)
39hg clone http://hg.openjdk.java.net/jdk8u/jdk8u 8u121-b13
40(cd !$ ; hg update -r jdk8u121-b13 && sh get_source.sh)
41hg clone http://hg.openjdk.java.net/jdk8u/jdk8u60/ 8u60
42(cd !$ ; sh get_source.sh)
43
44The newly created openjdk directory is then a suitable argument for the
45--upstream_root parameter.
46"""
47
48import argparse
Tobias Thierer976ab372017-03-22 15:30:18 +110049import csv
Tobias Thierer8d5f1162017-03-21 16:16:57 +110050import filecmp
51import os
52import re
53import shutil
Tobias Thierer976ab372017-03-22 15:30:18 +110054import sys
Tobias Thierer8d5f1162017-03-21 16:16:57 +110055
56def rel_paths_from_makefile(build_top):
57 """Returns the list of relative paths to .java files parsed from openjdk_java_files.mk"""
58 list_file = os.path.join(build_top, "libcore", "openjdk_java_files.mk")
59
60 result = []
61 with open(list_file, "r") as f:
62 for line in f:
63 match = re.match("\s+ojluni/src/main/java/(.+\.java)\s*\\\s*", line)
64 if match:
65 path = match.group(1)
66 # convert / to the appropriate separator (e.g. \ on Windows), just in case
67 path = os.path.normpath(path)
68 result.append(path)
69 return result
70
71def ojluni_path(build_top, rel_path):
72 """The full path of the file at the given rel_path in ojluni"""
73 return os.path.join(build_top, "libcore", "ojluni", "src", "main", "java", rel_path)
74
75def upstream_path(upstream_root, upstream, rel_path):
76 """The full path of the file at the given rel_path in the given upstream"""
77 source_dirs = [
78 "jdk/src/share/classes",
79 "jdk/src/solaris/classes"
80 ]
81 for source_dir in source_dirs:
82 source_dir = os.path.normpath(source_dir)
83 result = os.path.join(upstream_root, upstream, source_dir, rel_path)
84 if os.path.exists(result):
85 return result
86 return None
87
Tobias Thiererf806f3a2017-04-04 20:59:26 -070088# For lists of length N and M, respectively, this runs in time O(N*M).
89# This could be improved to O(D*(N+M)) for lists with distance <= D by
90# only considering array elements within D cells of the diagonal.
91def edit_distance(a, b):
Tobias Thierer8d5f1162017-03-21 16:16:57 +110092 """
Tobias Thiererf806f3a2017-04-04 20:59:26 -070093 Computes the line-based edit distance between two lists, i.e.
94 the smallest number of list items to delete, insert or replace
95 that would transform the content of one list into the other.
Tobias Thierer8d5f1162017-03-21 16:16:57 +110096 """
Tobias Thiererf806f3a2017-04-04 20:59:26 -070097 prev_cost = range(0, len(b) + 1)
98 for end_a in range(1, len(a) + 1):
Tobias Thierer976ab372017-03-22 15:30:18 +110099 # For each valid index i, prev_cost[i] is the edit distance between
Tobias Thiererf806f3a2017-04-04 20:59:26 -0700100 # a[:end_a-1] and b[:i].
Tobias Thierer976ab372017-03-22 15:30:18 +1100101 # We now calculate cur_cost[end_b] as the edit distance between
Tobias Thiererf806f3a2017-04-04 20:59:26 -0700102 # a[:end_a] and b[:end_b]
Tobias Thierer976ab372017-03-22 15:30:18 +1100103 cur_cost = [end_a]
Tobias Thiererf806f3a2017-04-04 20:59:26 -0700104 for end_b in range(1, len(b) + 1):
Tobias Thierer976ab372017-03-22 15:30:18 +1100105 c = min(
Tobias Thiererf806f3a2017-04-04 20:59:26 -0700106 cur_cost[-1] + 1, # append item from b
107 prev_cost[end_b] + 1, # append item from a
108 # match or replace item
109 prev_cost[end_b - 1] + (0 if a[end_a - 1] == b[end_b - 1] else 1)
Tobias Thierer976ab372017-03-22 15:30:18 +1100110 )
111 cur_cost.append(c)
112 prev_cost = cur_cost
113 return prev_cost[-1]
114
Tobias Thiererf806f3a2017-04-04 20:59:26 -0700115def compare_to_upstreams_and_save(out_file, build_top, upstream_root, upstreams, rel_paths):
Tobias Thierer976ab372017-03-22 15:30:18 +1100116 """
117 Prints tab-separated values comparing ojluni files vs. each
118 upstream, for each of the rel_paths, suitable for human
119 analysis in a spreadsheet.
120 This includes whether the corresponding upstream file is
121 missing, identical, or by how many lines it differs, and
122 a guess as to the correct upstream based on minimal line
123 difference (ties broken in favor of upstreams that occur
124 earlier in the list).
125 """
126 writer = csv.writer(out_file, delimiter='\t')
127 writer.writerow(["rel_path", "guessed_upstream"] + upstreams)
Tobias Thierer8d5f1162017-03-21 16:16:57 +1100128 for rel_path in rel_paths:
129 ojluni_file = ojluni_path(build_top, rel_path)
Tobias Thierer976ab372017-03-22 15:30:18 +1100130 upstream_comparisons = []
131 best_distance = sys.maxint
132 guessed_upstream = ""
Tobias Thierer8d5f1162017-03-21 16:16:57 +1100133 for upstream in upstreams:
134 upstream_file = upstream_path(upstream_root, upstream, rel_path)
135 if upstream_file is None:
Tobias Thierer976ab372017-03-22 15:30:18 +1100136 upstream_comparison = "missing"
Tobias Thierer8d5f1162017-03-21 16:16:57 +1100137 else:
Tobias Thiererf806f3a2017-04-04 20:59:26 -0700138 if filecmp.cmp(upstream_file, ojluni_file, shallow=False):
139 distance = 0
Tobias Thierer976ab372017-03-22 15:30:18 +1100140 upstream_comparison = "identical"
141 else:
Tobias Thiererf806f3a2017-04-04 20:59:26 -0700142 with open(upstream_file) as f:
143 lines_a = f.readlines()
144 with open(ojluni_file) as f:
145 lines_b = f.readlines()
146 distance = edit_distance(lines_a, lines_b)
147 # 0% for identical files
148 # 100% for totally different files or where one file is empty
149 percent_different = 100.0 * distance / max(len(lines_a), len(lines_b))
150 upstream_comparison = "%.1f%% different (%d lines)" % (percent_different, distance)
151 if distance < best_distance:
152 best_distance = distance
Tobias Thierer976ab372017-03-22 15:30:18 +1100153 guessed_upstream = upstream
154 upstream_comparisons.append(upstream_comparison)
155 writer.writerow([rel_path, guessed_upstream ] + upstream_comparisons)
Tobias Thierer8d5f1162017-03-21 16:16:57 +1100156
157def copy_files(rel_paths, upstream_root, upstream, output_dir):
158 """Copies files at the given rel_paths from upstream to output_dir"""
159 for rel_path in rel_paths:
160 upstream_file = upstream_path(upstream_root, upstream, rel_path)
161 if upstream_file is not None:
162 out_file = os.path.join(output_dir, rel_path)
163 out_dir = os.path.dirname(out_file)
164 if not os.path.exists(out_dir):
165 os.makedirs(out_dir)
166 shutil.copyfile(upstream_file, out_file)
167
168def main():
169 parser = argparse.ArgumentParser(
170 description="Check openjdk_java_files contents against upstream file contents.")
171 parser.add_argument("--upstream_root",
172 help="Path below where upstream sources are checked out. This should be a "
173 "directory with one child directory for each upstream (select the "
174 "upstreams to compare against via --upstreams).",
175 required=True,)
176 parser.add_argument("--upstreams",
177 default="8u121-b13,8u60,7u40",
178 help="Comma separated list of subdirectory names of --upstream_root that "
179 "each hold one upstream.")
180 parser.add_argument("--output_dir",
181 help="(optional) path where default upstream sources should be copied to; "
182 "this path must not yet exist and will be created. "
183 "The default upstream is the one that occurs first in --upstreams.")
184 parser.add_argument("--build_top",
185 default=os.environ.get('ANDROID_BUILD_TOP'),
186 help="Path where Android sources are checked out (defaults to $ANDROID_BUILD_TOP).")
187 args = parser.parse_args()
188 if args.output_dir is not None and os.path.exists(args.output_dir):
189 raise Exception("Output dir already exists: " + args.output_dir)
190
191 upstreams = [upstream.strip() for upstream in args.upstreams.split(',')]
192 default_upstream = upstreams[0]
193 for upstream in upstreams:
194 upstream_path = os.path.join(args.upstream_root, upstream)
195 if not os.path.exists(upstream_path):
196 raise Exception("Upstream not found: " + upstream_path)
197
198 rel_paths = rel_paths_from_makefile(args.build_top)
Tobias Thierer976ab372017-03-22 15:30:18 +1100199
200 compare_to_upstreams_and_save(
201 sys.stdout, args.build_top, args.upstream_root, upstreams, rel_paths)
Tobias Thierer8d5f1162017-03-21 16:16:57 +1100202
203 if args.output_dir is not None:
204 copy_files(rel_paths, args.upstream_root, default_upstream, args.output_dir)
205
Tobias Thierer8d5f1162017-03-21 16:16:57 +1100206if __name__ == '__main__':
207 main()