Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
Serge Guelton | 4a27478 | 2019-01-03 14:11:33 +0000 | [diff] [blame] | 3 | from __future__ import print_function |
| 4 | |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 5 | import argparse |
| 6 | import email.mime.multipart |
| 7 | import email.mime.text |
| 8 | import logging |
| 9 | import os.path |
| 10 | import pickle |
| 11 | import re |
| 12 | import smtplib |
| 13 | import subprocess |
| 14 | import sys |
| 15 | from datetime import datetime, timedelta |
| 16 | from phabricator import Phabricator |
| 17 | |
| 18 | # Setting up a virtualenv to run this script can be done by running the |
| 19 | # following commands: |
| 20 | # $ virtualenv venv |
| 21 | # $ . ./venv/bin/activate |
| 22 | # $ pip install Phabricator |
| 23 | |
| 24 | GIT_REPO_METADATA = (("llvm", "https://llvm.org/git/llvm.git"), ) |
| 25 | |
| 26 | # The below PhabXXX classes represent objects as modelled by Phabricator. |
| 27 | # The classes can be serialized to disk, to try and make sure that we don't |
| 28 | # needlessly have to re-fetch lots of data from Phabricator, as that would |
| 29 | # make this script unusably slow. |
| 30 | |
| 31 | |
| 32 | class PhabObject: |
| 33 | OBJECT_KIND = None |
| 34 | |
| 35 | def __init__(self, id): |
| 36 | self.id = id |
| 37 | |
| 38 | |
| 39 | class PhabObjectCache: |
| 40 | def __init__(self, PhabObjectClass): |
| 41 | self.PhabObjectClass = PhabObjectClass |
| 42 | self.most_recent_info = None |
| 43 | self.oldest_info = None |
| 44 | self.id2PhabObjects = {} |
| 45 | |
| 46 | def get_name(self): |
| 47 | return self.PhabObjectClass.OBJECT_KIND + "sCache" |
| 48 | |
| 49 | def get(self, id): |
| 50 | if id not in self.id2PhabObjects: |
| 51 | self.id2PhabObjects[id] = self.PhabObjectClass(id) |
| 52 | return self.id2PhabObjects[id] |
| 53 | |
| 54 | def get_ids_in_cache(self): |
Serge Guelton | 873cba1 | 2019-01-03 14:12:23 +0000 | [diff] [blame] | 55 | return list(self.id2PhabObjects.keys()) |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 56 | |
| 57 | def get_objects(self): |
Serge Guelton | 873cba1 | 2019-01-03 14:12:23 +0000 | [diff] [blame] | 58 | return list(self.id2PhabObjects.values()) |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 59 | |
| 60 | DEFAULT_DIRECTORY = "PhabObjectCache" |
| 61 | |
| 62 | def _get_pickle_name(self, directory): |
| 63 | file_name = "Phab" + self.PhabObjectClass.OBJECT_KIND + "s.pickle" |
| 64 | return os.path.join(directory, file_name) |
| 65 | |
| 66 | def populate_cache_from_disk(self, directory=DEFAULT_DIRECTORY): |
| 67 | """ |
| 68 | FIXME: consider if serializing to JSON would bring interoperability |
| 69 | advantages over serializing to pickle. |
| 70 | """ |
| 71 | try: |
| 72 | f = open(self._get_pickle_name(directory), "rb") |
| 73 | except IOError as err: |
| 74 | print("Could not find cache. Error message: {0}. Continuing..." |
| 75 | .format(err)) |
| 76 | else: |
| 77 | with f: |
| 78 | try: |
| 79 | d = pickle.load(f) |
| 80 | self.__dict__.update(d) |
| 81 | except EOFError as err: |
| 82 | print("Cache seems to be corrupt. " + |
| 83 | "Not using cache. Error message: {0}".format(err)) |
| 84 | |
| 85 | def write_cache_to_disk(self, directory=DEFAULT_DIRECTORY): |
| 86 | if not os.path.exists(directory): |
| 87 | os.makedirs(directory) |
| 88 | with open(self._get_pickle_name(directory), "wb") as f: |
| 89 | pickle.dump(self.__dict__, f) |
| 90 | print("wrote cache to disk, most_recent_info= {0}".format( |
| 91 | datetime.fromtimestamp(self.most_recent_info) |
| 92 | if self.most_recent_info is not None else None)) |
| 93 | |
| 94 | |
| 95 | class PhabReview(PhabObject): |
| 96 | OBJECT_KIND = "Review" |
| 97 | |
| 98 | def __init__(self, id): |
| 99 | PhabObject.__init__(self, id) |
| 100 | |
| 101 | def update(self, title, dateCreated, dateModified, author): |
| 102 | self.title = title |
| 103 | self.dateCreated = dateCreated |
| 104 | self.dateModified = dateModified |
| 105 | self.author = author |
| 106 | |
| 107 | def setPhabDiffs(self, phabDiffs): |
| 108 | self.phabDiffs = phabDiffs |
| 109 | |
| 110 | |
| 111 | class PhabUser(PhabObject): |
| 112 | OBJECT_KIND = "User" |
| 113 | |
| 114 | def __init__(self, id): |
| 115 | PhabObject.__init__(self, id) |
| 116 | |
| 117 | def update(self, phid, realName): |
| 118 | self.phid = phid |
| 119 | self.realName = realName |
| 120 | |
| 121 | |
| 122 | class PhabHunk: |
| 123 | def __init__(self, rest_api_hunk): |
| 124 | self.oldOffset = int(rest_api_hunk["oldOffset"]) |
| 125 | self.oldLength = int(rest_api_hunk["oldLength"]) |
| 126 | # self.actual_lines_changed_offset will contain the offsets of the |
| 127 | # lines that were changed in this hunk. |
| 128 | self.actual_lines_changed_offset = [] |
| 129 | offset = self.oldOffset |
| 130 | inHunk = False |
| 131 | hunkStart = -1 |
| 132 | contextLines = 3 |
| 133 | for line in rest_api_hunk["corpus"].split("\n"): |
| 134 | if line.startswith("+"): |
| 135 | # line is a new line that got introduced in this patch. |
| 136 | # Do not record it as a changed line. |
| 137 | if inHunk is False: |
| 138 | inHunk = True |
| 139 | hunkStart = max(self.oldOffset, offset - contextLines) |
| 140 | continue |
| 141 | if line.startswith("-"): |
| 142 | # line was changed or removed from the older version of the |
| 143 | # code. Record it as a changed line. |
| 144 | if inHunk is False: |
| 145 | inHunk = True |
| 146 | hunkStart = max(self.oldOffset, offset - contextLines) |
| 147 | offset += 1 |
| 148 | continue |
| 149 | # line is a context line. |
| 150 | if inHunk is True: |
| 151 | inHunk = False |
| 152 | hunkEnd = offset + contextLines |
| 153 | self.actual_lines_changed_offset.append((hunkStart, hunkEnd)) |
| 154 | offset += 1 |
| 155 | if inHunk is True: |
| 156 | hunkEnd = offset + contextLines |
| 157 | self.actual_lines_changed_offset.append((hunkStart, hunkEnd)) |
| 158 | |
| 159 | # The above algorithm could result in adjacent or overlapping ranges |
| 160 | # being recorded into self.actual_lines_changed_offset. |
| 161 | # Merge the adjacent and overlapping ranges in there: |
| 162 | t = [] |
| 163 | lastRange = None |
| 164 | for start, end in self.actual_lines_changed_offset + \ |
| 165 | [(sys.maxsize, sys.maxsize)]: |
| 166 | if lastRange is None: |
| 167 | lastRange = (start, end) |
| 168 | else: |
| 169 | if lastRange[1] >= start: |
| 170 | lastRange = (lastRange[0], end) |
| 171 | else: |
| 172 | t.append(lastRange) |
| 173 | lastRange = (start, end) |
| 174 | self.actual_lines_changed_offset = t |
| 175 | |
| 176 | |
| 177 | class PhabChange: |
| 178 | def __init__(self, rest_api_change): |
| 179 | self.oldPath = rest_api_change["oldPath"] |
| 180 | self.hunks = [PhabHunk(h) for h in rest_api_change["hunks"]] |
| 181 | |
| 182 | |
| 183 | class PhabDiff(PhabObject): |
| 184 | OBJECT_KIND = "Diff" |
| 185 | |
| 186 | def __init__(self, id): |
| 187 | PhabObject.__init__(self, id) |
| 188 | |
| 189 | def update(self, rest_api_results): |
| 190 | self.revisionID = rest_api_results["revisionID"] |
| 191 | self.dateModified = int(rest_api_results["dateModified"]) |
| 192 | self.dateCreated = int(rest_api_results["dateCreated"]) |
| 193 | self.changes = [PhabChange(c) for c in rest_api_results["changes"]] |
| 194 | |
| 195 | |
| 196 | class ReviewsCache(PhabObjectCache): |
| 197 | def __init__(self): |
| 198 | PhabObjectCache.__init__(self, PhabReview) |
| 199 | |
| 200 | |
| 201 | class UsersCache(PhabObjectCache): |
| 202 | def __init__(self): |
| 203 | PhabObjectCache.__init__(self, PhabUser) |
| 204 | |
| 205 | |
| 206 | reviews_cache = ReviewsCache() |
| 207 | users_cache = UsersCache() |
| 208 | |
| 209 | |
| 210 | def init_phab_connection(): |
| 211 | phab = Phabricator() |
| 212 | phab.update_interfaces() |
| 213 | return phab |
| 214 | |
| 215 | |
| 216 | def update_cached_info(phab, cache, phab_query, order, record_results, |
| 217 | max_nr_entries_per_fetch, max_nr_days_to_cache): |
| 218 | q = phab |
| 219 | LIMIT = max_nr_entries_per_fetch |
| 220 | for query_step in phab_query: |
| 221 | q = getattr(q, query_step) |
| 222 | results = q(order=order, limit=LIMIT) |
| 223 | most_recent_info, oldest_info = record_results(cache, results, phab) |
| 224 | oldest_info_to_fetch = datetime.fromtimestamp(most_recent_info) - \ |
| 225 | timedelta(days=max_nr_days_to_cache) |
| 226 | most_recent_info_overall = most_recent_info |
| 227 | cache.write_cache_to_disk() |
| 228 | after = results["cursor"]["after"] |
| 229 | print("after: {0!r}".format(after)) |
| 230 | print("most_recent_info: {0}".format( |
| 231 | datetime.fromtimestamp(most_recent_info))) |
| 232 | while (after is not None |
| 233 | and datetime.fromtimestamp(oldest_info) > oldest_info_to_fetch): |
| 234 | need_more_older_data = \ |
| 235 | (cache.oldest_info is None or |
| 236 | datetime.fromtimestamp(cache.oldest_info) > oldest_info_to_fetch) |
| 237 | print(("need_more_older_data={0} cache.oldest_info={1} " + |
| 238 | "oldest_info_to_fetch={2}").format( |
| 239 | need_more_older_data, |
| 240 | datetime.fromtimestamp(cache.oldest_info) |
| 241 | if cache.oldest_info is not None else None, |
| 242 | oldest_info_to_fetch)) |
| 243 | need_more_newer_data = \ |
| 244 | (cache.most_recent_info is None or |
| 245 | cache.most_recent_info < most_recent_info) |
| 246 | print(("need_more_newer_data={0} cache.most_recent_info={1} " + |
| 247 | "most_recent_info={2}") |
| 248 | .format(need_more_newer_data, cache.most_recent_info, |
| 249 | most_recent_info)) |
| 250 | if not need_more_older_data and not need_more_newer_data: |
| 251 | break |
| 252 | results = q(order=order, after=after, limit=LIMIT) |
| 253 | most_recent_info, oldest_info = record_results(cache, results, phab) |
| 254 | after = results["cursor"]["after"] |
| 255 | print("after: {0!r}".format(after)) |
| 256 | print("most_recent_info: {0}".format( |
| 257 | datetime.fromtimestamp(most_recent_info))) |
| 258 | cache.write_cache_to_disk() |
| 259 | cache.most_recent_info = most_recent_info_overall |
| 260 | if after is None: |
| 261 | # We did fetch all records. Mark the cache to contain all info since |
| 262 | # the start of time. |
| 263 | oldest_info = 0 |
| 264 | cache.oldest_info = oldest_info |
| 265 | cache.write_cache_to_disk() |
| 266 | |
| 267 | |
| 268 | def record_reviews(cache, reviews, phab): |
| 269 | most_recent_info = None |
| 270 | oldest_info = None |
| 271 | for reviewInfo in reviews["data"]: |
| 272 | if reviewInfo["type"] != "DREV": |
| 273 | continue |
| 274 | id = reviewInfo["id"] |
| 275 | # phid = reviewInfo["phid"] |
| 276 | dateModified = int(reviewInfo["fields"]["dateModified"]) |
| 277 | dateCreated = int(reviewInfo["fields"]["dateCreated"]) |
| 278 | title = reviewInfo["fields"]["title"] |
| 279 | author = reviewInfo["fields"]["authorPHID"] |
| 280 | phabReview = cache.get(id) |
| 281 | if "dateModified" not in phabReview.__dict__ or \ |
| 282 | dateModified > phabReview.dateModified: |
| 283 | diff_results = phab.differential.querydiffs(revisionIDs=[id]) |
| 284 | diff_ids = sorted(diff_results.keys()) |
| 285 | phabDiffs = [] |
| 286 | for diff_id in diff_ids: |
| 287 | diffInfo = diff_results[diff_id] |
| 288 | d = PhabDiff(diff_id) |
| 289 | d.update(diffInfo) |
| 290 | phabDiffs.append(d) |
| 291 | phabReview.update(title, dateCreated, dateModified, author) |
| 292 | phabReview.setPhabDiffs(phabDiffs) |
| 293 | print("Updated D{0} modified on {1} ({2} diffs)".format( |
| 294 | id, datetime.fromtimestamp(dateModified), len(phabDiffs))) |
| 295 | |
| 296 | if most_recent_info is None: |
| 297 | most_recent_info = dateModified |
| 298 | elif most_recent_info < dateModified: |
| 299 | most_recent_info = dateModified |
| 300 | |
| 301 | if oldest_info is None: |
| 302 | oldest_info = dateModified |
| 303 | elif oldest_info > dateModified: |
| 304 | oldest_info = dateModified |
| 305 | return most_recent_info, oldest_info |
| 306 | |
| 307 | |
| 308 | def record_users(cache, users, phab): |
| 309 | most_recent_info = None |
| 310 | oldest_info = None |
| 311 | for info in users["data"]: |
| 312 | if info["type"] != "USER": |
| 313 | continue |
| 314 | id = info["id"] |
| 315 | phid = info["phid"] |
| 316 | dateModified = int(info["fields"]["dateModified"]) |
| 317 | # dateCreated = int(info["fields"]["dateCreated"]) |
| 318 | realName = info["fields"]["realName"] |
| 319 | phabUser = cache.get(id) |
| 320 | phabUser.update(phid, realName) |
| 321 | if most_recent_info is None: |
| 322 | most_recent_info = dateModified |
| 323 | elif most_recent_info < dateModified: |
| 324 | most_recent_info = dateModified |
| 325 | if oldest_info is None: |
| 326 | oldest_info = dateModified |
| 327 | elif oldest_info > dateModified: |
| 328 | oldest_info = dateModified |
| 329 | return most_recent_info, oldest_info |
| 330 | |
| 331 | |
| 332 | PHABCACHESINFO = ((reviews_cache, ("differential", "revision", "search"), |
| 333 | "updated", record_reviews, 5, 7), |
| 334 | (users_cache, ("user", "search"), "newest", record_users, |
| 335 | 100, 1000)) |
| 336 | |
| 337 | |
| 338 | def load_cache(): |
| 339 | for cache, phab_query, order, record_results, _, _ in PHABCACHESINFO: |
| 340 | cache.populate_cache_from_disk() |
| 341 | print("Loaded {0} nr entries: {1}".format( |
| 342 | cache.get_name(), len(cache.get_ids_in_cache()))) |
| 343 | print("Loaded {0} has most recent info: {1}".format( |
| 344 | cache.get_name(), |
| 345 | datetime.fromtimestamp(cache.most_recent_info) |
| 346 | if cache.most_recent_info is not None else None)) |
| 347 | |
| 348 | |
| 349 | def update_cache(phab): |
| 350 | load_cache() |
| 351 | for cache, phab_query, order, record_results, max_nr_entries_per_fetch, \ |
| 352 | max_nr_days_to_cache in PHABCACHESINFO: |
| 353 | update_cached_info(phab, cache, phab_query, order, record_results, |
| 354 | max_nr_entries_per_fetch, max_nr_days_to_cache) |
| 355 | ids_in_cache = cache.get_ids_in_cache() |
| 356 | print("{0} objects in {1}".format(len(ids_in_cache), cache.get_name())) |
| 357 | cache.write_cache_to_disk() |
| 358 | |
| 359 | |
| 360 | def get_most_recent_reviews(days): |
| 361 | newest_reviews = sorted( |
| 362 | reviews_cache.get_objects(), key=lambda r: -r.dateModified) |
| 363 | if len(newest_reviews) == 0: |
| 364 | return newest_reviews |
| 365 | most_recent_review_time = \ |
| 366 | datetime.fromtimestamp(newest_reviews[0].dateModified) |
| 367 | cut_off_date = most_recent_review_time - timedelta(days=days) |
| 368 | result = [] |
| 369 | for review in newest_reviews: |
| 370 | if datetime.fromtimestamp(review.dateModified) < cut_off_date: |
| 371 | return result |
| 372 | result.append(review) |
| 373 | return result |
| 374 | |
| 375 | |
| 376 | # All of the above code is about fetching data from Phabricator and caching it |
| 377 | # on local disk. The below code contains the actual "business logic" for this |
| 378 | # script. |
| 379 | |
| 380 | _userphid2realname = None |
| 381 | |
| 382 | |
| 383 | def get_real_name_from_author(user_phid): |
| 384 | global _userphid2realname |
| 385 | if _userphid2realname is None: |
| 386 | _userphid2realname = {} |
| 387 | for user in users_cache.get_objects(): |
| 388 | _userphid2realname[user.phid] = user.realName |
| 389 | return _userphid2realname.get(user_phid, "unknown") |
| 390 | |
| 391 | |
| 392 | def print_most_recent_reviews(phab, days, filter_reviewers): |
| 393 | msgs = [] |
| 394 | |
| 395 | def add_msg(msg): |
| 396 | msgs.append(msg) |
| 397 | print(msg) |
| 398 | |
| 399 | newest_reviews = get_most_recent_reviews(days) |
Kristof Beyls | 566c74c | 2018-06-06 14:19:58 +0000 | [diff] [blame] | 400 | add_msg(u"These are the reviews that look interesting to be reviewed. " + |
| 401 | u"The report below has 2 sections. The first " + |
| 402 | u"section is organized per review; the second section is organized " |
| 403 | + u"per potential reviewer.\n") |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 404 | oldest_review = newest_reviews[-1] if len(newest_reviews) > 0 else None |
| 405 | oldest_datetime = \ |
| 406 | datetime.fromtimestamp(oldest_review.dateModified) \ |
| 407 | if oldest_review else None |
Kristof Beyls | 566c74c | 2018-06-06 14:19:58 +0000 | [diff] [blame] | 408 | add_msg((u"The report below is based on analyzing the reviews that got " + |
| 409 | u"touched in the past {0} days (since {1}). " + |
| 410 | u"The script found {2} such reviews.\n").format( |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 411 | days, oldest_datetime, len(newest_reviews))) |
| 412 | reviewer2reviews_and_scores = {} |
| 413 | for i, review in enumerate(newest_reviews): |
| 414 | matched_reviewers = find_reviewers_for_review(review) |
| 415 | matched_reviewers = filter_reviewers(matched_reviewers) |
| 416 | if len(matched_reviewers) == 0: |
| 417 | continue |
Kristof Beyls | 566c74c | 2018-06-06 14:19:58 +0000 | [diff] [blame] | 418 | add_msg((u"{0:>3}. https://reviews.llvm.org/D{1} by {2}\n {3}\n" + |
| 419 | u" Last updated on {4}").format( |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 420 | i, review.id, |
| 421 | get_real_name_from_author(review.author), review.title, |
| 422 | datetime.fromtimestamp(review.dateModified))) |
| 423 | for reviewer, scores in matched_reviewers: |
Kristof Beyls | 566c74c | 2018-06-06 14:19:58 +0000 | [diff] [blame] | 424 | add_msg(u" potential reviewer {0}, score {1}".format( |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 425 | reviewer, |
| 426 | "(" + "/".join(["{0:.1f}%".format(s) for s in scores]) + ")")) |
| 427 | if reviewer not in reviewer2reviews_and_scores: |
| 428 | reviewer2reviews_and_scores[reviewer] = [] |
| 429 | reviewer2reviews_and_scores[reviewer].append((review, scores)) |
| 430 | |
| 431 | # Print out a summary per reviewer. |
| 432 | for reviewer in sorted(reviewer2reviews_and_scores.keys()): |
| 433 | reviews_and_scores = reviewer2reviews_and_scores[reviewer] |
| 434 | reviews_and_scores.sort(key=lambda rs: rs[1], reverse=True) |
Kristof Beyls | 566c74c | 2018-06-06 14:19:58 +0000 | [diff] [blame] | 435 | add_msg(u"\n\nSUMMARY FOR {0} (found {1} reviews):".format( |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 436 | reviewer, len(reviews_and_scores))) |
| 437 | for review, scores in reviews_and_scores: |
Kristof Beyls | 566c74c | 2018-06-06 14:19:58 +0000 | [diff] [blame] | 438 | add_msg(u"[{0}] https://reviews.llvm.org/D{1} '{2}' by {3}".format( |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 439 | "/".join(["{0:.1f}%".format(s) for s in scores]), review.id, |
| 440 | review.title, get_real_name_from_author(review.author))) |
| 441 | return "\n".join(msgs) |
| 442 | |
| 443 | |
| 444 | def get_git_cmd_output(cmd): |
| 445 | output = None |
| 446 | try: |
| 447 | logging.debug(cmd) |
| 448 | output = subprocess.check_output( |
| 449 | cmd, shell=True, stderr=subprocess.STDOUT) |
| 450 | except subprocess.CalledProcessError as e: |
| 451 | logging.debug(str(e)) |
| 452 | if output is None: |
| 453 | return None |
| 454 | return output.decode("utf-8", errors='ignore') |
| 455 | |
| 456 | |
| 457 | reAuthorMail = re.compile("^author-mail <([^>]*)>.*$") |
| 458 | |
| 459 | |
| 460 | def parse_blame_output_line_porcelain(blame_output): |
| 461 | email2nr_occurences = {} |
| 462 | if blame_output is None: |
| 463 | return email2nr_occurences |
| 464 | for line in blame_output.split('\n'): |
| 465 | m = reAuthorMail.match(line) |
| 466 | if m: |
| 467 | author_email_address = m.group(1) |
| 468 | if author_email_address not in email2nr_occurences: |
| 469 | email2nr_occurences[author_email_address] = 1 |
| 470 | else: |
| 471 | email2nr_occurences[author_email_address] += 1 |
| 472 | return email2nr_occurences |
| 473 | |
| 474 | |
| 475 | def find_reviewers_for_diff_heuristic(diff): |
| 476 | # Heuristic 1: assume good reviewers are the ones that touched the same |
| 477 | # lines before as this patch is touching. |
| 478 | # Heuristic 2: assume good reviewers are the ones that touched the same |
| 479 | # files before as this patch is touching. |
| 480 | reviewers2nr_lines_touched = {} |
| 481 | reviewers2nr_files_touched = {} |
| 482 | # Assume last revision before diff was modified is the revision the diff |
| 483 | # applies to. |
| 484 | git_repo = "git_repos/llvm" |
| 485 | cmd = 'git -C {0} rev-list -n 1 --before="{1}" master'.format( |
| 486 | git_repo, |
| 487 | datetime.fromtimestamp( |
| 488 | diff.dateModified).strftime("%Y-%m-%d %H:%M:%s")) |
| 489 | base_revision = get_git_cmd_output(cmd).strip() |
| 490 | logging.debug("Base revision={0}".format(base_revision)) |
| 491 | for change in diff.changes: |
| 492 | path = change.oldPath |
| 493 | # Compute heuristic 1: look at context of patch lines. |
| 494 | for hunk in change.hunks: |
| 495 | for start_line, end_line in hunk.actual_lines_changed_offset: |
| 496 | # Collect git blame results for authors in those ranges. |
| 497 | cmd = ("git -C {0} blame --encoding=utf-8 --date iso -f -e " + |
| 498 | "-w --line-porcelain -L {1},{2} {3} -- {4}").format( |
| 499 | git_repo, start_line, end_line, base_revision, path) |
| 500 | blame_output = get_git_cmd_output(cmd) |
| 501 | for reviewer, nr_occurences in \ |
| 502 | parse_blame_output_line_porcelain(blame_output).items(): |
| 503 | if reviewer not in reviewers2nr_lines_touched: |
| 504 | reviewers2nr_lines_touched[reviewer] = 0 |
| 505 | reviewers2nr_lines_touched[reviewer] += nr_occurences |
| 506 | # Compute heuristic 2: don't look at context, just at files touched. |
| 507 | # Collect git blame results for authors in those ranges. |
| 508 | cmd = ("git -C {0} blame --encoding=utf-8 --date iso -f -e -w " + |
| 509 | "--line-porcelain {1} -- {2}").format(git_repo, base_revision, |
| 510 | path) |
| 511 | blame_output = get_git_cmd_output(cmd) |
| 512 | for reviewer, nr_occurences in parse_blame_output_line_porcelain( |
| 513 | blame_output).items(): |
| 514 | if reviewer not in reviewers2nr_files_touched: |
| 515 | reviewers2nr_files_touched[reviewer] = 0 |
| 516 | reviewers2nr_files_touched[reviewer] += 1 |
| 517 | |
| 518 | # Compute "match scores" |
| 519 | total_nr_lines = sum(reviewers2nr_lines_touched.values()) |
| 520 | total_nr_files = len(diff.changes) |
| 521 | reviewers_matchscores = \ |
| 522 | [(reviewer, |
| 523 | (reviewers2nr_lines_touched.get(reviewer, 0)*100.0/total_nr_lines |
| 524 | if total_nr_lines != 0 else 0, |
| 525 | reviewers2nr_files_touched[reviewer]*100.0/total_nr_files |
| 526 | if total_nr_files != 0 else 0)) |
| 527 | for reviewer, nr_lines |
| 528 | in reviewers2nr_files_touched.items()] |
| 529 | reviewers_matchscores.sort(key=lambda i: i[1], reverse=True) |
| 530 | return reviewers_matchscores |
| 531 | |
| 532 | |
| 533 | def find_reviewers_for_review(review): |
| 534 | # Process the newest diff first. |
| 535 | diffs = sorted( |
| 536 | review.phabDiffs, key=lambda d: d.dateModified, reverse=True) |
| 537 | if len(diffs) == 0: |
| 538 | return |
| 539 | diff = diffs[0] |
| 540 | matched_reviewers = find_reviewers_for_diff_heuristic(diff) |
| 541 | # Show progress, as this is a slow operation: |
| 542 | sys.stdout.write('.') |
| 543 | sys.stdout.flush() |
Kristof Beyls | 566c74c | 2018-06-06 14:19:58 +0000 | [diff] [blame] | 544 | logging.debug(u"matched_reviewers: {0}".format(matched_reviewers)) |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 545 | return matched_reviewers |
| 546 | |
| 547 | |
| 548 | def update_git_repos(): |
| 549 | git_repos_directory = "git_repos" |
| 550 | for name, url in GIT_REPO_METADATA: |
| 551 | dirname = os.path.join(git_repos_directory, name) |
| 552 | if not os.path.exists(dirname): |
| 553 | cmd = "git clone {0} {1}".format(url, dirname) |
| 554 | output = get_git_cmd_output(cmd) |
| 555 | cmd = "git -C {0} pull --rebase".format(dirname) |
| 556 | output = get_git_cmd_output(cmd) |
| 557 | |
| 558 | |
Kristof Beyls | 63e68ac | 2018-06-29 07:16:27 +0000 | [diff] [blame] | 559 | def send_emails(email_addresses, sender, msg): |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 560 | s = smtplib.SMTP() |
| 561 | s.connect() |
| 562 | for email_address in email_addresses: |
| 563 | email_msg = email.mime.multipart.MIMEMultipart() |
Kristof Beyls | 63e68ac | 2018-06-29 07:16:27 +0000 | [diff] [blame] | 564 | email_msg['From'] = sender |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 565 | email_msg['To'] = email_address |
| 566 | email_msg['Subject'] = 'LLVM patches you may be able to review.' |
Kristof Beyls | 63e68ac | 2018-06-29 07:16:27 +0000 | [diff] [blame] | 567 | email_msg.attach(email.mime.text.MIMEText(msg.encode('utf-8'), 'plain')) |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 568 | # python 3.x: s.send_message(email_msg) |
Kristof Beyls | 63e68ac | 2018-06-29 07:16:27 +0000 | [diff] [blame] | 569 | s.sendmail(email_msg['From'], email_msg['To'], email_msg.as_string()) |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 570 | s.quit() |
| 571 | |
| 572 | |
| 573 | def filter_reviewers_to_report_for(people_to_look_for): |
| 574 | # The below is just an example filter, to only report potential reviews |
| 575 | # to do for the people that will receive the report email. |
| 576 | return lambda potential_reviewers: [r for r in potential_reviewers |
| 577 | if r[0] in people_to_look_for] |
| 578 | |
| 579 | |
| 580 | def main(): |
| 581 | parser = argparse.ArgumentParser( |
| 582 | description='Match open reviews to potential reviewers.') |
| 583 | parser.add_argument( |
| 584 | '--no-update-cache', |
| 585 | dest='update_cache', |
| 586 | action='store_false', |
| 587 | default=True, |
| 588 | help='Do not update cached Phabricator objects') |
| 589 | parser.add_argument( |
Kristof Beyls | 63e68ac | 2018-06-29 07:16:27 +0000 | [diff] [blame] | 590 | '--email-report', |
| 591 | dest='email_report', |
| 592 | nargs='*', |
| 593 | default="", |
| 594 | help="A email addresses to send the report to.") |
| 595 | parser.add_argument( |
| 596 | '--sender', |
| 597 | dest='sender', |
| 598 | default="", |
| 599 | help="The email address to use in 'From' on messages emailed out.") |
| 600 | parser.add_argument( |
| 601 | '--email-addresses', |
| 602 | dest='email_addresses', |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 603 | nargs='*', |
| 604 | help="The email addresses (as known by LLVM git) of " + |
| 605 | "the people to look for reviews for.") |
| 606 | parser.add_argument('--verbose', '-v', action='count') |
| 607 | |
| 608 | args = parser.parse_args() |
| 609 | |
| 610 | if args.verbose >= 1: |
| 611 | logging.basicConfig(level=logging.DEBUG) |
| 612 | |
| 613 | people_to_look_for = [e.decode('utf-8') for e in args.email_addresses] |
Kristof Beyls | 63e68ac | 2018-06-29 07:16:27 +0000 | [diff] [blame] | 614 | logging.debug("Will look for reviews that following contributors could " + |
| 615 | "review: {}".format(people_to_look_for)) |
| 616 | logging.debug("Will email a report to: {}".format(args.email_report)) |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 617 | |
| 618 | phab = init_phab_connection() |
| 619 | |
| 620 | if args.update_cache: |
| 621 | update_cache(phab) |
| 622 | |
| 623 | load_cache() |
| 624 | update_git_repos() |
| 625 | msg = print_most_recent_reviews( |
| 626 | phab, |
| 627 | days=1, |
| 628 | filter_reviewers=filter_reviewers_to_report_for(people_to_look_for)) |
Kristof Beyls | 63e68ac | 2018-06-29 07:16:27 +0000 | [diff] [blame] | 629 | |
| 630 | if args.email_report != []: |
| 631 | send_emails(args.email_report, args.sender, msg) |
Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame] | 632 | |
| 633 | |
| 634 | if __name__ == "__main__": |
| 635 | main() |